示例#1
0
    def joined(self):
        self.relations[0].to_publish_raw['unit_name'] = self._unit_name
        prefix = self.expand_name('{endpoint_name}.')
        ca_available = self.root_ca_cert
        ca_changed = ca_available and data_changed(prefix + 'ca',
                                                   self.root_ca_cert)
        server_available = self.server_certs
        server_changed = server_available and data_changed(
            prefix + 'servers', self.server_certs)
        client_available = self.client_certs
        client_changed = client_available and data_changed(
            prefix + 'clients', self.client_certs)
        certs_available = server_available or client_available
        certs_changed = server_changed or client_changed

        set_flag(prefix + 'available')
        toggle_flag(prefix + 'ca.available', ca_available)
        toggle_flag(prefix + 'ca.changed', ca_changed)
        toggle_flag(prefix + 'server.certs.available', server_available)
        toggle_flag(prefix + 'server.certs.changed', server_changed)
        toggle_flag(prefix + 'client.certs.available', client_available)
        toggle_flag(prefix + 'client.certs.changed', client_changed)
        toggle_flag(prefix + 'certs.available', certs_available)
        toggle_flag(prefix + 'certs.changed', certs_changed)
        # deprecated
        toggle_flag(prefix + 'server.cert.available', self.server_certs)
        toggle_flag(prefix + 'client.cert.available', self.get_client_cert())
        toggle_flag(prefix + 'batch.cert.available', self.server_certs)
示例#2
0
def join_or_update_cohorts():
    """Join or update a cohort snapshot.

    All units of this application (leader and followers) need to refresh their
    installed snaps to the current cohort snapshot.
    """
    kube_control = endpoint_from_flag("kube-control.cohort_keys.available")
    cohort_keys = kube_control.cohort_keys
    for snapname in cohort_snaps:
        hookenv.status_set("maintenance", "Joining cohort for {}.".format(snapname))
        cohort_key = cohort_keys[snapname]
        for delay in (5, 30, 60):
            try:
                snap.join_cohort_snapshot(snapname, cohort_key)
                hookenv.log("Joined cohort for {}".format(snapname))
                break
            except subprocess.CalledProcessError:
                hookenv.log(
                    "Error joining cohort for {}".format(snapname), level=hookenv.ERROR
                )
                hookenv.status_set(
                    "maintenance",
                    "Error joining cohort for {} (see logs), "
                    "will retry.".format(snapname),
                )
                time.sleep(delay)
        else:
            set_flag("kubernetes-worker.cohorts.failed")
            return
    # Update our cache of the cohort keys, now that they're successfully applied.
    data_changed("master-cohorts", cohort_keys)
    set_flag("kubernetes-worker.cohorts.joined")
    clear_flag("kubernetes-worker.cohorts.failed")
示例#3
0
def watch_for_changes():
    """Watch for configuration changes and signal if we need to restart the
    worker services"""
    kube_control = endpoint_from_flag("kube-control.dns.available")
    container_runtime = endpoint_from_flag("endpoint.container-runtime.available")

    servers = get_kube_api_servers()
    dns = kube_control.get_dns()
    cluster_cidr = kubernetes_common.cluster_cidr()
    container_runtime_name = container_runtime.get_runtime()
    container_runtime_socket = container_runtime.get_socket()
    container_runtime_nvidia = container_runtime.get_nvidia_enabled()

    if container_runtime_nvidia:
        set_state("nvidia.ready")
    else:
        remove_state("nvidia.ready")

    if (
        data_changed("kube-api-servers", servers)
        or data_changed("kube-dns", dns)
        or data_changed("cluster-cidr", cluster_cidr)
        or data_changed("container-runtime", container_runtime_name)
        or data_changed("container-socket", container_runtime_socket)
    ):
        set_state("kubernetes-worker.restart-needed")
示例#4
0
 def set_cert(self, cert, key):
     rel = self._unit.relation
     if self._is_top_level_server_cert:
         # backwards compatibility; if this is the cert that was requested
         # as a single server cert, set it in the response as the single
         # server cert
         rel.to_publish_raw.update({
             self._server_cert_key: cert,
             self._server_key_key: key,
         })
     else:
         data = rel.to_publish.get(self._publish_key, {})
         data[self.common_name] = {
             'cert': cert,
             'key': key,
         }
         rel.to_publish[self._publish_key] = data
     if not rel.endpoint.new_server_requests:
         clear_flag(
             rel.endpoint.expand_name('{endpoint_name}.server'
                                      '.cert.requested'))
     if not rel.endpoint.new_requests:
         clear_flag(
             rel.endpoint.expand_name('{endpoint_name}.'
                                      'certs.requested'))
     data_changed(self._key, sorted(set(self.sans or [])))
示例#5
0
def install_etcd_credentials():
    etcd = endpoint_from_flag('etcd.available')
    etcd.save_client_credentials(ETCD_KEY_PATH, ETCD_CERT_PATH, ETCD_CA_PATH)
    # register initial etcd data so that we can detect changes
    data_changed('calico.etcd.data',
                 (etcd.get_connection_string(), etcd.get_client_credentials()))
    set_state('calico.etcd-credentials.installed')
示例#6
0
    def handle_remote_config(self):
        """
        Keep track of received data so we can know if it changes.

        :return: None
        """
        clear_flag(self.expand_name('endpoint.{endpoint_name}.reconfigure'))
        # Presently, we only care about one piece of remote config. Expand
        # the list as needed.
        data_changed('containerd.remote_config', [self.get_sandbox_image()])
def update_status_info():
    endpoint = endpoint_from_flag('endpoint.kubernetes-deployer.available')
    status = check_predefined_resources()
    error_states = unitdata.kv().get('error-states', {})
    status.update(error_states)
    worker_ips = get_worker_node_ips()
    # Only report if the status has changed
    if (data_changed('status-info', status)
            or data_changed('worker-ips', worker_ips)):
        endpoint.send_status(status)
        endpoint.send_worker_ips(worker_ips)
def process_snapd_timer():
    """
    Set the snapd refresh timer on the leader so all cluster members
    (present and future) will refresh near the same time.

    :return: None
    """
    # Get the current snapd refresh timer; we know layer-snap has set this
    # when the 'snap.refresh.set' flag is present.
    timer = snap.get(snapname="core",
                     key="refresh.timer").decode("utf-8").strip()
    if not timer:
        # The core snap timer is empty. This likely means a subordinate timer
        # reset ours. Try to set it back to a previously leader-set value,
        # falling back to config if needed. Luckily, this should only happen
        # during subordinate install, so this should remain stable afterward.
        timer = leader_get("snapd_refresh") or hookenv.config("snapd_refresh")
        snap.set_refresh_timer(timer)

        # Ensure we have the timer known by snapd (it may differ from config).
        timer = snap.get(snapname="core",
                         key="refresh.timer").decode("utf-8").strip()

    # The first time through, data_changed will be true. Subsequent calls
    # should only update leader data if something changed.
    if data_changed("snapd_refresh", timer):
        hookenv.log("setting leader snapd_refresh timer to: {}".format(timer))
        leader_set({"snapd_refresh": timer})
示例#9
0
 def _handle_changed(self):
     set_flag(self.expand_name("{endpoint_name}.connected"))
     if self.connection_string():
         set_flag(self.expand_name("{endpoint_name}.available"))
         data_key = self.expand_name("endpoint.{endpoint_name}.data")
         if data_changed(data_key, self.connection_string()):
             set_flag(self.expand_name("{endpoint_name}.changed"))
示例#10
0
def _get_secret_id(vault):
    token = vault.unit_token
    if data_changed("layer.vault-kv.token", token):
        log("Changed unit_token, getting new secret_id")
        # token is one-shot, but if it changes it might mean that we're
        # being told to rotate the secret ID, or we might not have fetched
        # one yet
        vault_url = vault.vault_url
        try:
            secret_id = retrieve_secret_id(vault_url, token)
        except (
                requests.exceptions.ConnectionError,
                hvac.exceptions.VaultDown,
                hvac.exceptions.VaultNotInitialized,
                hvac.exceptions.BadGateway,
        ) as e:
            raise VaultNotReady() from e
        unitdata.kv().set("layer.vault-kv.secret_id", secret_id)
        # have to flush immediately because if we don't and hit some error
        # elsewhere, it could get us into a state where we have forgotten the
        # secret ID and can't retrieve it again because we've already used the
        # token
        unitdata.kv().flush()
    else:
        secret_id = unitdata.kv().get("layer.vault-kv.secret_id")
    return secret_id
示例#11
0
def check_config_changed():
    try:
        config = vault_kv.get_vault_config()
    except vault_kv.VaultNotReady:
        return
    else:
        if data_changed("layer.vault-kv.config", config):
            set_flag("layer.vault-kv.config.changed")
示例#12
0
 def manage_flags(self):
     toggle_flag(self.expand_name('{endpoint_name}.connected'),
                 self.is_joined)
     toggle_flag(self.expand_name('{endpoint_name}.gpu.available'),
                 self.is_joined and self._get_gpu())
     requests_data_id = self.expand_name('{endpoint_name}.requests')
     requests = self.auth_user()
     if data_changed(requests_data_id, requests):
         set_flag(self.expand_name('{endpoint_name}.requests.changed'))
示例#13
0
def check_etcd_changes():
    etcd = endpoint_from_flag('etcd.available')
    if data_changed(
            'calico.etcd.data',
        (etcd.get_connection_string(), etcd.get_client_credentials())):
        etcd.save_client_credentials(ETCD_KEY_PATH, ETCD_CERT_PATH,
                                     ETCD_CA_PATH)
        remove_state('calico.service.installed')
        remove_state('calico.npc.deployed')
示例#14
0
 def _changed(self):
     # Set the master/standby changed flags. The charm is
     # responsible for clearing this, if it cares. Flags are
     # cleared before being set to ensure triggers are triggered.
     upgrade = hookenv.hook_name() == 'upgrade-charm'
     self._reset_all_flags()
     key = self.expand_name('endpoint.{endpoint_name}.master.changed')
     if data_changed(key, [str(cs.master) for cs in self]) or (self.master and upgrade):
         self._clear_flag('{endpoint_name}.master.changed')
         self._set_flag('{endpoint_name}.master.changed')
         self._clear_flag('{endpoint_name}.database.changed')
         self._set_flag('{endpoint_name}.database.changed')
     key = self.expand_name('endpoint.{endpoint_name}.standbys.changed')
     if data_changed(key, [sorted(str(s) for s in cs.standbys) for cs in self]) or (self.standbys and upgrade):
         self._clear_flag('{endpoint_name}.standbys.changed')
         self._set_flag('{endpoint_name}.standbys.changed')
         self._clear_flag('{endpoint_name}.database.changed')
         self._set_flag('{endpoint_name}.database.changed')
     self._clear_flag('endpoint.{endpoint_name}.changed')
示例#15
0
def catch_change_in_creds(kube_control):
    """Request a service restart in case credential updates were detected."""
    nodeuser = "******".format(get_node_name().lower())
    creds = kube_control.get_auth_credentials(nodeuser)
    if creds and creds["user"] == nodeuser:
        # We need to cache the credentials here because if the
        # control-plane changes (control-plane leader dies and replaced by a new one)
        # the new control-plane will have no recollection of our certs.
        db.set("credentials", creds)
        set_state("worker.auth.bootstrapped")
        if data_changed("kube-control.creds", creds):
            set_state("kubernetes-worker.restart-needed")
示例#16
0
def start_worker():
    """Start kubelet using the provided API and DNS info."""
    # Note that the DNS server doesn't necessarily exist at this point. We know
    # what its IP will eventually be, though, so we can go ahead and configure
    # kubelet with that info. This ensures that early pods are configured with
    # the correct DNS even though the server isn't ready yet.
    kube_control = endpoint_from_flag("kube-control.dns.available")

    servers = get_kube_api_servers()
    dns = kube_control.get_dns()
    dns_domain = dns["domain"]
    dns_ip = dns["sdn-ip"]
    registry = get_registry_location()
    cluster_cidr = kubernetes_common.cluster_cidr()

    if cluster_cidr is None:
        hookenv.log("Waiting for cluster cidr.")
        return

    if not servers:
        hookenv.log("Waiting for API server URL")
        return

    if kubernetes_common.is_ipv6(cluster_cidr):
        kubernetes_common.enable_ipv6_forwarding()

    creds = db.get("credentials")
    data_changed("kube-control.creds", creds)

    create_config(servers[get_unit_number() % len(servers)], creds)
    configure_default_cni(kube_control.get_default_cni())
    configure_kubelet(dns_domain, dns_ip, registry, has_xcp=kube_control.has_xcp)
    configure_kube_proxy(configure_prefix, servers, cluster_cidr)
    set_state("kubernetes-worker.config.created")
    restart_unit_services()
    update_kubelet_status()
    set_state("kubernetes-worker.label-config-required")
    set_state("nrpe-external-master.reconfigure")
    remove_state("kubernetes-worker.restart-needed")
    remove_state("endpoint.kube-control.has-xcp.changed")
示例#17
0
 def _changed(self):
     # Set the master/standby changed flags. The charm is
     # responsible for clearing this, if it cares. Flags are
     # cleared before being set to ensure triggers are triggered.
     upgrade = hookenv.hook_name() == 'upgrade-charm'
     self._reset_all_flags()
     key = self.expand_name('endpoint.{endpoint_name}.master.changed')
     if data_changed(key, [str(cs.master)
                           for cs in self]) or (self.master and upgrade):
         self._clear_flag('{endpoint_name}.master.changed')
         self._set_flag('{endpoint_name}.master.changed')
         self._clear_flag('{endpoint_name}.database.changed')
         self._set_flag('{endpoint_name}.database.changed')
     key = self.expand_name('endpoint.{endpoint_name}.standbys.changed')
     if data_changed(key,
                     [sorted(str(s) for s in cs.standbys)
                      for cs in self]) or (self.standbys and upgrade):
         self._clear_flag('{endpoint_name}.standbys.changed')
         self._set_flag('{endpoint_name}.standbys.changed')
         self._clear_flag('{endpoint_name}.database.changed')
         self._set_flag('{endpoint_name}.database.changed')
     self._clear_flag('endpoint.{endpoint_name}.changed')
示例#18
0
    def set_cert(self, cert, key):
        """Send the cert and key to all units of the application

        :param cert: TLS Certificate
        :type cert: str
        :param key: TLS Private Key
        :type cert: str
        """
        rel = self._unit.relation
        for unit in self._unit.relation.units:
            pub_key = self.derive_publish_key(unit=unit)
            data = rel.to_publish.get(pub_key, {})
            data['app_data'] = {
                'cert': cert,
                'key': key,
            }
            rel.to_publish[pub_key] = data
        if not rel.endpoint.new_application_requests:
            clear_flag(
                rel.endpoint.expand_name(
                    '{endpoint_name}.application.certs.requested'))
        data_changed(self._key, sorted(set(self.sans or [])))
 def is_changed(self):
     """
     Whether or not the request for this instance has changed.
     """
     return data_changed(self.expand_name('all-data'), [
         self.vsphere_ip,
         self.user,
         self.password,
         self.datacenter,
         self.datastore,
         self.folder,
         self.respool_path,
     ])
def get_model():
    is_resource = True
    model = None
    # model = layer.kubeflow_tf_serving.get_model_from_resource()
    if not model:
        is_resource = False
        model = layer.kubeflow_tf_serving.get_model_from_config()
    if model:
        if data_changed('charm.kubeflow-tf-serving.model', model):
            set_flag('charm.kubeflow-tf-serving.has-model')
            clear_flag('charm.kubeflow-tf-serving.started')
            unitdata.kv().set('charm.kf-tf-serving.model', model)
            unitdata.kv().set('charm.kf-tf-serving.is-resource', is_resource)
    else:
        clear_flag('charm.kubeflow-tf-serving.has-model')
        unitdata.kv().unset('charm.kubeflow-tf-serving.model')
示例#21
0
 def is_changed(self):
     """
     Whether or not the request for this instance has changed.
     """
     return data_changed(self.expand_name('all-data'), [
         self.auth_url,
         self.region,
         self.username,
         self.password,
         self.user_domain_name,
         self.project_domain_name,
         self.project_name,
         self.endpoint_tls_ca,
         self.subnet_id,
         self.floating_network_id,
         self.lb_method,
         self.manage_security_groups,
     ])
def process_snapd_timer():
    ''' Set the snapd refresh timer on the leader so all cluster members
    (present and future) will refresh near the same time. '''
    # Get the current snapd refresh timer; we know layer-snap has set this
    # when the 'snap.refresh.set' flag is present.
    timer = snap.get(snapname='core', key='refresh.timer').decode('utf-8')
    if not timer:
        # A subordinate wiped out our value, so we need to force it to be set
        # again. Luckily, the subordinate should only wipe it out once, on
        # first install, so this should remain stable afterward.
        snap.set_refresh_timer(hookenv.config('snapd_refresh'))
        timer = snap.get(snapname='core', key='refresh.timer').decode('utf-8')

    # The first time through, data_changed will be true. Subsequent calls
    # should only update leader data if something changed.
    if data_changed('snapd_refresh', timer):
        hookenv.log('setting leader snapd_refresh timer to: {}'.format(timer))
        leadership.leader_set({'snapd_refresh': timer})
def _fetch():
    should_set_status = layer.options.get('docker-resource', 'set-status')
    queue = unitdata.kv().get('layer.docker-resource.pending', [])
    failed = []
    for res_name in queue:
        prefix = 'layer.docker-resource.{}'.format(res_name)
        if should_set_status:
            layer.status.maintenance('fetching resource: {}'.format(res_name))
        try:
            image_info_filename = hookenv.resource_get(res_name)
            if not image_info_filename:
                raise ValueError('no filename returned')
            image_info = yaml.safe_load(Path(image_info_filename).read_text())
            if not image_info:
                raise ValueError('no data returned')
        except Exception as e:
            hookenv.log(
                'unable to fetch docker resource {}: {}'.format(res_name, e),
                level=hookenv.ERROR)
            failed.append(res_name)
            set_flag('{}.failed'.format(prefix))
            clear_flag('{}.available'.format(prefix))
            clear_flag('{}.changed'.format(prefix))
        else:
            unitdata.kv().set('{}.image-info'.format(prefix), image_info)
            was_available = is_flag_set('{}.available'.format(prefix))
            is_changed = data_changed(prefix, image_info)
            set_flag('{}.available'.format(prefix))
            clear_flag('{}.failed'.format(prefix))
            toggle_flag('{}.changed'.format(prefix),
                        was_available and is_changed)
    if failed:
        if should_set_status:
            pl = 's' if len(failed) > 1 else ''
            layer.status.blocked(
                'unable to fetch resource{}: {}'.format(
                    pl, ', '.join(failed)
                )
            )
        unitdata.kv().set('layer.docker-resource.pending', failed)
        set_flag('layer.docker-resource.pending')
    else:
        unitdata.kv().set('layer.docker-resource.pending', [])
        clear_flag('layer.docker-resource.pending')
示例#24
0
def nfs_state_control(mount):
    """Determine if we should remove the state that controls the re-render
    and execution of the nfs-relation-changed event because there
    are changes in the relationship data, and we should re-render any
    configs"""

    mount_data = get_first_mount(mount)
    if mount_data:
        nfs_relation_data = {
            "options": mount_data["options"],
            "host": mount_data["hostname"],
            "mountpoint": mount_data["mountpoint"],
            "fstype": mount_data["fstype"],
        }

        # Re-execute the rendering if the data has changed.
        if data_changed("nfs-config", nfs_relation_data):
            hookenv.log("reconfiguring nfs")
            remove_state("nfs.configured")
示例#25
0
def update_registry_location():
    """Handle changes to the container image registry.

    Monitor the image registry location. If it changes, manage flags to ensure
    our image-related handlers will be invoked with an accurate registry.
    """
    registry_location = get_registry_location()

    if registry_location:
        runtime = endpoint_from_flag("endpoint.container-runtime.available")
        if runtime:
            # Construct and send the sandbox image (pause container) to our runtime
            uri = get_sandbox_image_uri(registry_location)
            runtime.set_config(sandbox_image=uri)

    if data_changed("registry-location", registry_location):
        remove_state("kubernetes-worker.config.created")
        remove_state("kubernetes-worker.ingress.available")
        remove_state("nfs.configured")
        set_state("kubernetes-worker.restart-needed")
示例#26
0
def ensure_package_status():
    '''Hold or unhold packages per the package_status configuration option.

    All packages installed using this module and handlers are affected.

    An mechanism may be added in the future to override this for a
    subset of installed packages.
    '''
    packages = installed()
    if not packages:
        return
    config = hookenv.config()
    package_status = config.get('package_status') or ''
    changed = reactive.data_changed('apt.package_status',
                                    (package_status, sorted(packages)))
    if changed:
        if package_status == 'hold':
            hookenv.log('Holding packages {}'.format(','.join(packages)))
            fetch.apt_hold(packages)
        else:
            hookenv.log('Unholding packages {}'.format(','.join(packages)))
            fetch.apt_unhold(packages)
    reactive.clear_flag('apt.needs_hold')