def upgrade(): """An upgrade has been triggered.""" pki_directory = os.path.join(easyrsa_directory, "pki") if os.path.isdir(pki_directory): # specific handling if the upgrade is from a previous version # where certificate_authority_serial is not set at install serial_file = "serial" with chdir(pki_directory): # if the ca and ca_key are set and serial is not # set this to serial in the pki directory if (os.path.isfile(serial_file) and leader_get("certificate_authority") and leader_get("certificate_authority_key") and not leader_get("certificate_authority_serial")): with open(serial_file, "r") as stream: ca_serial = stream.read() # set the previously unset certificate authority serial leader_set({"certificate_authority_serial": ca_serial}) charm_pki_directory = os.path.join(charm_directory, "pki") # When the charm pki directory exists, it is stale, remove it. if os.path.isdir(charm_pki_directory): shutil.rmtree(charm_pki_directory) # Copy the EasyRSA/pki to the charm pki directory. shutil.copytree(pki_directory, charm_pki_directory, symlinks=True) clear_flag("easyrsa.installed") clear_flag("easyrsa.configured")
def prepare_end_user_package(): """ Prepare the tarball package for clients to use to connet to the swarm cluster using the default client credentials. """ # If we are a follower, we dont have keys and need to fetch them # from leader-data, which triggered `leadership.set.client_cert` # So it better be there! if not path.exists('swarm_credentials'): makedirs('swarm_credentials') with open('swarm_credentials/key.pem', 'w+') as fp: fp.write(leader_get('client_key')) with open('swarm_credentials/cert.pem', 'w+') as fp: fp.write(leader_get('client_cert')) with open('swarm_credentials/ca.pem', 'w+') as fp: fp.write(leader_get('certificate_authority')) # Render the client package script template_vars = {'public_address': unit_get('public-address')} render('enable.sh', './swarm_credentials/enable.sh', template_vars) # clear out any stale credentials package if path.exists('swarm_credentials.tar'): remove('swarm_credentials.tar') cmd = 'tar cvfz swarm_credentials.tar.gz swarm_credentials' subprocess.check_call(split(cmd)) copyfile('swarm_credentials.tar.gz', '/home/ubuntu/swarm_credentials.tar.gz') set_state('client.credentials.placed')
def prepare_end_user_package(): """ Prepare the tarball package for clients to use to connet to the swarm cluster using the default client credentials. """ # If we are a follower, we dont have keys and need to fetch them # from leader-data, which triggered `leadership.set.client_cert` # So it better be there! if not path.exists("swarm_credentials"): makedirs("swarm_credentials") with open("swarm_credentials/key.pem", "w+") as fp: fp.write(leader_get("client_key")) with open("swarm_credentials/cert.pem", "w+") as fp: fp.write(leader_get("client_cert")) with open("swarm_credentials/ca.pem", "w+") as fp: fp.write(leader_get("certificate_authority")) # Render the client package script template_vars = {"public_address": unit_get("public-address")} render("enable.sh", "./swarm_credentials/enable.sh", template_vars) # clear out any stale credentials package if path.exists("swarm_credentials.tar"): remove("swarm_credentials.tar") cmd = "tar cvfz swarm_credentials.tar.gz swarm_credentials" subprocess.check_call(split(cmd)) copyfile("swarm_credentials.tar.gz", "/home/ubuntu/swarm_credentials.tar.gz") set_state("client.credentials.placed")
def write_cert_secret(): ''' Write returned certificate into a secret for the webhook. This data is also shared across the leadership data to other units. ''' hookenv.status_set('maintenance', 'Writing certificates') cert = leader_get('cert').encode('utf-8') key = leader_get('key').encode('utf-8') context = {} context['namespace'] = namespace context['cert'] = base64.b64encode(cert).decode('utf-8') context['key'] = base64.b64encode(key).decode('utf-8') render('certs.yaml', secret_yaml, context) hookenv.log('Updating AWS-IAM secret.') try: _kubectl('apply', '-f', secret_yaml) except CalledProcessError as e: hookenv.log(e) hookenv.log( 'Failed to create AWS_IAM secret. Will attempt again next update.' ) # noqa return set_flag('charm.aws-iam.certificate-written')
def render_slave_config(): render('carte-config/slave.xml.j2', '/home/etl/carte-config.xml', { 'carteslaveport': leader_get('port'), 'carteslavehostname': hookenv.unit_private_ip(), 'cartemasterhostname': leader_get('hostname'), 'carteslavepassword': leader_get('password'), 'cartemasterpassword': leader_get('password'), 'cartemasterport': leader_get('port') })
def send_ca(tls): '''The client relationship has been established, read the CA and client certificate from leadership data to set them on the relationship object.''' certificate_authority = leader_get('certificate_authority') tls.set_ca(certificate_authority) # The client cert and key should be same for all connections. client_cert = leader_get('client_certificate') client_key = leader_get('client_key') # Set the client certificate and key on the relationship object. tls.set_client_cert(client_cert, client_key)
def write_webhook_yaml(): ''' Write out the webhook yaml file for the api server to use. Everyone, including the leader, does this with leadership data set by the leader. ''' hookenv.status_set('maintenance', 'Writing apiserver webhook configuration') context = {} cert = leader_get('cert').encode('utf-8') context['cert'] = base64.b64encode(cert).decode('utf-8') context['service_ip'] = leader_get('service_ip') render('webhook.yaml', webhook_path, context) aws_iam = endpoint_from_flag('endpoint.aws-iam.available') aws_iam.set_webhook_status(True) set_flag('charm.aws-iam.written-webhook')
def setSlaveProperties(): cid = leader_get('cluster') ats = leader_get('authtoken') directory = '/var/lib/dcos' if not os.path.exists(directory): os.makedirs(directory) f = open('/var/lib/dcos/cluster-id', 'w') f.write(cid) f.close() f = open('/var/lib/dcos/auth-token-secret', 'w') f.write(ats) f.close() startDCOS() set_state('dcos-master.running') status_set('active', 'DC/OS started')
def new_munge_consumer(self): remote_unit = hookenv.remote_unit() if remote_unit: mk = leadership.leader_get('munge_key') hookenv.log( 'new_munge_consumer(): join event from %s, publishing key: %s' % (remote_unit, mk))
def process_snapd_timer(): """ Set the snapd refresh timer on the leader so all cluster members (present and future) will refresh near the same time. :return: None """ # Get the current snapd refresh timer; we know layer-snap has set this # when the 'snap.refresh.set' flag is present. timer = snap.get(snapname="core", key="refresh.timer").decode("utf-8").strip() if not timer: # The core snap timer is empty. This likely means a subordinate timer # reset ours. Try to set it back to a previously leader-set value, # falling back to config if needed. Luckily, this should only happen # during subordinate install, so this should remain stable afterward. timer = leader_get("snapd_refresh") or hookenv.config("snapd_refresh") snap.set_refresh_timer(timer) # Ensure we have the timer known by snapd (it may differ from config). timer = snap.get(snapname="core", key="refresh.timer").decode("utf-8").strip() # The first time through, data_changed will be true. Subsequent calls # should only update leader data if something changed. if data_changed("snapd_refresh", timer): hookenv.log("setting leader snapd_refresh timer to: {}".format(timer)) leader_set({"snapd_refresh": timer})
def write_config_file(): cc = hookenv.config() lc = leadership.leader_get() config = { "api-macaroon-timeout": cc["api-macaroon-timeout"], "discharge-macaroon-timeout": cc["discharge-macaroon-timeout"], "discharge-token-timeout": cc["discharge-token-timeout"], "enable-email-login": cc["enable-email-login"], "logging-config": cc["logging-config"], "private-addr": hookenv.unit_private_ip(), "rendezvous-timeout": cc["rendezvous-timeout"], "skip-location-for-cookie-paths": cc["skip-location-for-cookie-paths"], } if cc["admin-agent-public-key"]: config["admin-agent-public-key"] = cc["admin-agent-public-key"] if cc["http-proxy"]: config["http-proxy"] = cc["http-proxy"] # extend no-proxy to include all candid units. no_proxy = [cc["no-proxy"]] if not no_proxy[0]: no_proxy = no_proxy[1:] ep = endpoint_from_flag('candid.connected') if ep: no_proxy.extend(ep.addresses) config["no-proxy"] = ",".join(no_proxy) if cc["identity-providers"]: try: config["identity-providers"] = \ candid.parse_identity_providers(cc["identity-providers"]) except candid.IdentityProvidersParseError as e: hookenv.log("invalid identity providers: {}".format(e), level="error") if cc["location"]: config["location"] = cc["location"] if cc["private-key"]: config["private-key"] = cc["private-key"] elif lc.get("private-key"): config["private-key"] = lc["private-key"] if cc["public-key"]: config["public-key"] = cc["public-key"] elif lc.get("public-key"): config["public-key"] = lc["public-key"] if cc["redirect-login-trusted-urls"]: config["redirect-login-trusted-urls"] = \ _parse_list(cc["redirect-login-trusted-urls"]) if cc["redirect-login-trusted-domains"]: config["redirect-login-trusted-domains"] = \ _parse_list(cc["redirect-login-trusted-domains"]) pg = endpoint_from_flag('postgres.master.available') if pg: config["storage"] = { "type": "postgres", "connection-string": str(pg.master), } else: config["storage"] = {"type": "memory"} candid.update_config(CONFIG_FILE, config) set_flag('candid.configured') set_flag('candid.restart')
def enable_client_tls(): """ Copy the TLS certificates in place and generate mount points for the swarm manager to mount the certs. This enables client-side TLS security on the TCP service. """ if not path.exists("/etc/docker"): makedirs("/etc/docker") kv = unitdata.kv() cert = kv.get("tls.server.certificate") with open("/etc/docker/server.pem", "w+") as f: f.write(cert) with open("/etc/docker/ca.pem", "w+") as f: f.write(leader_get("certificate_authority")) # schenanigans keypath = "easy-rsa/easyrsa3/pki/private/{}.key" server = getenv("JUJU_UNIT_NAME").replace("/", "_") if path.exists(keypath.format(server)): copyfile(keypath.format(server), "/etc/docker/server-key.pem") else: copyfile(keypath.format(unit_get("public-address")), "/etc/docker/server-key.pem") opts = DockerOpts() config_dir = "/etc/docker" cert_path = "{}/server.pem".format(config_dir) ca_path = "{}/ca.pem".format(config_dir) key_path = "{}/server-key.pem".format(config_dir) opts.add("tlscert", cert_path) opts.add("tlscacert", ca_path) opts.add("tlskey", key_path) opts.add("tlsverify", None) render("docker.defaults", "/etc/default/docker", {"opts": opts.to_s()})
def controller_ip_port_list(cls): """List of ip:port pairs for Amphorae instances health reporting. The list is built based on information from individual Octavia units coordinated, stored and shared among all units trhough leader storage. :param cls: charms_openstack.adapters.ConfigurationAdapter derived class instance. Charm class instance is at cls.charm_instance. :type: cls: charms_openstack.adapters.ConfiguartionAdapter :returns: Comma separated list of ip:port pairs. :rtype: str """ # -> contrail addition return ch_net_ip.get_host_ip(ch_core.hookenv.network_get("public")["ingress-addresses"][0]) + ':' + OCTAVIA_HEALTH_LISTEN_PORT # <- contrail addition try: ip_list = json.loads( leadership.leader_get('controller-ip-port-list')) except TypeError: return if ip_list: port_suffix = ':' + OCTAVIA_HEALTH_LISTEN_PORT return (port_suffix + ', ').join(sorted(ip_list)) + port_suffix
def enable_client_tls(): """ Copy the TLS certificates in place and generate mount points for the swarm manager to mount the certs. This enables client-side TLS security on the TCP service. """ if not path.exists('/etc/docker'): makedirs('/etc/docker') kv = unitdata.kv() cert = kv.get('tls.server.certificate') with open('/etc/docker/server.pem', 'w+') as f: f.write(cert) with open('/etc/docker/ca.pem', 'w+') as f: f.write(leader_get('certificate_authority')) # schenanigans keypath = 'easy-rsa/easyrsa3/pki/private/{}.key' server = getenv('JUJU_UNIT_NAME').replace('/', '_') if path.exists(keypath.format(server)): copyfile(keypath.format(server), '/etc/docker/server-key.pem') else: copyfile(keypath.format(unit_get('public-address')), '/etc/docker/server-key.pem') opts = DockerOpts() config_dir = '/etc/docker' cert_path = '{}/server.pem'.format(config_dir) ca_path = '{}/ca.pem'.format(config_dir) key_path = '{}/server-key.pem'.format(config_dir) opts.add('tlscert', cert_path) opts.add('tlscacert', ca_path) opts.add('tlskey', key_path) opts.add('tlsverify', None) render('docker.defaults', '/etc/default/docker', {'opts': opts.to_s()})
def return_secrets(secrets=None): """Return secrets dict """ conf = config() if secrets: secrets_mod = secrets else: secrets_mod = {} secrets_mod['redis_host'] = kv.get('redis_host') secrets_mod['redis_port'] = kv.get('redis_port') secrets_mod['postgresql_host'] = kv.get('postgresql_host') secrets_mod['postgresql_port'] = kv.get('postgresql_port') secrets_mod['postgresql_user'] = kv.get('postgresql_user') secrets_mod['postgresql_password'] = kv.get('postgresql_password') secrets_mod['postgresql_dbname'] = kv.get('postgresql_dbname') secrets_mod['system_secret_key'] = leader_get('system_secret_key') if conf.get('aws-key'): secrets_mod['AWS_KEY'] = config('aws-key') if conf.get('aws-secret'): secrets_mod['AWS_SECRET'] = config('aws-secret') if conf.get('aws-region'): secrets_mod['AWS_REGION'] = config('aws-region') if conf.get('secrets', ''): secrets_from_config = config('secrets').strip().split(",") for secret in secrets_from_config: s = secret.split("=") secrets_mod[s[0]] = s[1] return secrets_mod
def get_seed_ips(): '''Return the set of seed ip addresses. We use ip addresses rather than unit names, as we may need to use external seed ips at some point. ''' return set((leadership.leader_get('seeds') or '').split(','))
def db_monitor_respond(): """Response to db-monitor relation changed.""" ch_core.hookenv.log("db-monitor connected", ch_core.hookenv.DEBUG) db_monitor = reactive.endpoint_from_flag("db-monitor.connected") # get related application name = user username = related_app = ch_core.hookenv.remote_service_name() # get or create db-monitor user password db_monitor_stored_passwd_key = "db-monitor.{}.passwd".format(related_app) password = leadership.leader_get(db_monitor_stored_passwd_key) if not password: password = ch_core.host.pwgen() leadership.leader_set({db_monitor_stored_passwd_key: password}) # provide relation data with charm.provide_charm_instance() as instance: # NOTE (rgildein): Create a custom user with administrator privileges, # but read-only access. if not instance.create_cluster_user(db_monitor.relation_ip, username, password, True): ch_core.hookenv.log("db-monitor user was not created.", ch_core.hookenv.WARNING) return db_monitor.provide_access( port=instance.cluster_port, user=username, password=password, ) instance.assess_status()
def get_keys_from_leader(keys, overwrite_local=False): """ Gets the broadcasted keys from the leader and stores them in the corresponding files. Args: keys: list of keys. Keys are actually files on the FS. Returns: True if all key were fetched, False if not. """ # This races with other codepaths, and seems to require being created first # This block may be extracted later, but for now seems to work as intended os.makedirs('/root/cdk', exist_ok=True) for k in keys: # If the path does not exist, assume we need it if not os.path.exists(k) or overwrite_local: # Fetch data from leadership broadcast contents = leader_get(k) # Default to logging the warning and wait for leader data to be set if contents is None: msg = "Waiting on leaders crypto keys." hookenv.status_set('waiting', msg) hookenv.log('Missing content for file {}'.format(k)) return False # Write out the file and move on to the next item with open(k, 'w+') as fp: fp.write(contents) fp.write('\n') return True
def reinstall_spark(): spark_master_host = leadership.leader_get('master-fqdn') peers = [] zks = [] if is_state('zookeeper.ready'): # if ZK is availuable we are in HA. We do not want reconfigurations if a leader fails # HA takes care of this spark_master_host = '' zk = RelationBase.from_state('zookeeper.ready') zks = zk.zookeepers() # We need reconfigure Spark when in HA and peers change ignore otherwise peers = get_spark_peers() deployment_matrix = { 'spark_master': spark_master_host, 'yarn_ready': is_state('hadoop.yarn.ready'), 'hdfs_ready': is_state('hadoop.hdfs.ready'), 'zookeepers': zks, 'peers': peers, } if not data_changed('deployment_matrix', deployment_matrix): return hookenv.status_set('maintenance', 'configuring spark') hadoop = (RelationBase.from_state('hadoop.yarn.ready') or RelationBase.from_state('hadoop.hdfs.ready')) if install_spark(hadoop, zks): if is_state('hadoop.yarn.ready'): set_deployment_mode_state('spark.yarn.installed') else: set_deployment_mode_state('spark.standalone.installed') report_status()
def install_spark_yarn(): """ Called in 'yarn-*' mode after Juju has elected a leader. The 'hadoop.yarn.ready' state must be set. """ hosts = { 'spark-master': leadership.leader_get('master-fqdn'), } hadoop = (RelationBase.from_state('hadoop.yarn.ready') or RelationBase.from_state('hadoop.hdfs.ready')) rms = hadoop.resourcemanagers() hosts['resourcemanager'] = rms[0] # Probably don't need to check this since yarn.ready implies hdfs.ready # for us, but it doesn't hurt. if is_state('hadoop.hdfs.ready'): nns = hadoop.namenodes() hosts['namenode'] = nns[0] # Always include native hadoop libs in yarn mode; add cuda libs if present. extra_libs = ["/usr/lib/hadoop/lib/native"] if is_state('cuda.installed'): extra_libs.append("/usr/local/cuda/lib64") spark = Spark() spark.configure(hosts, zk_units=None, peers=None, extra_libs=extra_libs) set_deployment_mode_state('spark.yarn.installed')
def end_admin_party(config_path='/etc/couchdb'): """ Couch starts out in 'admin party' mode, which means that anyone can create and edit databases. This routine secures couch, and flags us to restart. @param str config_path: The location of the config files in the system. """ log("Ending the admin party.", DEBUG) _maybe_generate_passwords() passwords = json.loads(leader_get('passwords')) entries = [ {'section': 'admins', 'key': 'admin', 'value': passwords['admin_pass']}, {'section': 'admins', 'key': 'replication', 'value': passwords['repl_pass']}, {'section': 'couch_httpd_auth', 'key': 'require_valid_user', 'value': 'true'}, ] if config("human-auditable-creds"): entries += [ {'section': 'juju_notes', 'key': 'admin_pass', 'value': passwords['admin_pass']}, {'section': 'juju_notes', 'key': 'repl_pass', 'value': passwords['repl_pass']}, ] _write_config(config_path, 'local', entries) remove_state('couchdb.admin_party')
def install_spark_standalone(zks, peers): """ Called in local/standalone mode after Juju has elected a leader. """ hosts = { 'spark-master': leadership.leader_get('master-fqdn'), } # If zks have changed and we are not handling a departed spark peer, # give the ensemble time to settle. Otherwise we might try to start # spark master with data from the wrong zk leader. Doing so will cause # spark-master to shutdown: # https://issues.apache.org/jira/browse/SPARK-15544 if (zks and data_changed('zks', zks) and not is_state('sparkpeers.departed')): hookenv.status_set('maintenance', 'waiting for zookeeper ensemble to settle') hookenv.log( "Waiting 2m to ensure zk ensemble has settled: {}".format(zks)) time.sleep(120) # Let spark know if we have cuda libs installed. # NB: spark packages prereq hadoop (boo), so even in standalone mode, we'll # have hadoop libs installed. May as well include them in our lib path. extra_libs = ["/usr/lib/hadoop/lib/native"] if is_state('cuda.installed'): extra_libs.append("/usr/local/cuda/lib64") spark = Spark() spark.configure(hosts, zk_units=zks, peers=peers, extra_libs=extra_libs) set_deployment_mode_state('spark.standalone.installed')
def install_spark_standalone(zks, peers): """ Called in local/standalone mode after Juju has elected a leader. """ hosts = { 'spark-master': leadership.leader_get('master-fqdn'), } # If zks have changed and we are not handling a departed spark peer, # give the ensemble time to settle. Otherwise we might try to start # spark master with data from the wrong zk leader. Doing so will cause # spark-master to shutdown: # https://issues.apache.org/jira/browse/SPARK-15544 if (zks and data_changed('zks', zks) and not is_state('sparkpeers.departed')): hookenv.status_set('maintenance', 'waiting for zookeeper ensemble to settle') hookenv.log("Waiting 2m to ensure zk ensemble has settled: {}".format(zks)) time.sleep(120) # Let spark know if we have cuda libs installed. # NB: spark packages prereq hadoop (boo), so even in standalone mode, we'll # have hadoop libs installed. May as well include them in our lib path. extra_libs = ["/usr/lib/hadoop/lib/native"] if is_state('cuda.installed'): extra_libs.append("/usr/local/cuda/lib64") spark = Spark() spark.configure(hosts, zk_units=zks, peers=peers, extra_libs=extra_libs) set_deployment_mode_state('spark.standalone.installed')
def return_secrets(secrets=None): """Return secrets dict """ if secrets: secrets_mod = secrets else: secrets_mod = {} for k in ('redis_host', 'redis_port', 'postgresql_host', 'postgresql_port', 'postgresql_user', 'postgresql_password', 'postgresql_dbname', 'github_app_id', 'github_api_secret', 'github_extended_permissions', 'social_auth_redirect_is_https', 'email_server_host', 'email_server_port', 'email_server_username', 'email_server_password', 'email_server_tls', 'email_from', 'enable_statsd', 'beacon'): secrets_mod[k] = kv.get(k) secrets_mod['system_secret_key'] = leader_get('system_secret_key') return secrets_mod
def provide_munge_key_to_interface(munge_provider): '''Provide munge key if any consumers are related and if ''' munge_key = leadership.leader_get('munge_key') hookenv.log('provide_munge_key_to_interface(): exposing munge key: %s' % munge_key) munge_provider.expose_munge_key(munge_key) munge_provider.provide_munge_key() flags.set_flag('munge.exposed')
def configure_munge_key(): munge_key = leadership.leader_get('munge_key') munge.render_munge_key(context={'munge_key': munge_key}) hookenv.log( 'configure_munge_key(): leadership detected new munge key, rendered new file' ) # set a flag confirming that munge key is rendered flags.set_flag('munge.configured')
def _maybe_generate_passwords(): """ If the leader hasn't generated passwords yet, generate them. """ if not leader_get('passwords'): admin_pass = subprocess.check_output(['pwgen', '-N1']).strip().decode('utf-8') repl_pass = subprocess.check_output(['pwgen', '-N1']).strip().decode('utf-8') leader_set(passwords=json.dumps({'admin_pass': admin_pass, 'repl_pass': repl_pass}))
def update_recovery_conf(follow): assert follow != hookenv.local_unit() peer_rel = helpers.get_peer_relation() follow_relinfo = peer_rel.get(follow) assert follow_relinfo is not None, "Invalid upstream {}".format(follow) current_follow = get_following() if follow != current_follow: status_set("maintenance", "Following new unit {}".format(follow)) set_following(follow) # Setting the state to defer publication until after restart. reactive.set_state("postgresql.replication.publish_following") else: # Even though the master is unchanged, we still regenerate # recovery.conf in case connection details such as IP addresses # have changed. hookenv.log("Continuing to follow {}".format(follow)) pg12 = postgresql.has_version("12") if pg12: path = postgresql.hot_standby_conf_path() template = "hot_standby.conf.tmpl" else: path = postgresql.recovery_conf_path() template = "recovery.conf.tmpl" config = hookenv.config() data = dict( streaming_replication=config["streaming_replication"], host=follow_relinfo["host"], port=follow_relinfo["port"], user=replication_username(), password=leader_get("replication_password"), ) if reactive.helpers.is_state("postgresql.wal_e.enabled"): data["restore_command"] = wal_e.wal_e_restore_command() templating.render(template, path, data, owner="postgres", group="postgres", perms=0o600) if pg12: touch(postgresql.hot_standby_signal_path()) # Use @when_file_changed for this when Issue #44 is resolved. if reactive.helpers.any_file_changed([path]): reactive.set_state("postgresql.cluster.needs_restart") if reactive.is_state("postgresql.replication.cloned"): reactive.set_state("postgresql.replication.check_following")
def inform_restart(self): ''' Inform our peers that we have restarted, usually as part of a rolling restart. ''' for conv in self.conversations(): nonce = leader_get('restart_nonce') conv.set_remote('restarted.{}'.format(nonce), json.dumps(True))
def set_snapd_timer(): ''' Set the snapd refresh.timer on non-leader cluster members. ''' # NB: This method should only be run when 'snap.refresh.set' is present. # Layer-snap will always set a core refresh.timer, which may not be the # same as our leader. Gating with 'snap.refresh.set' ensures layer-snap # has finished and we are free to set our config to the leader's timer. timer = leadership.leader_get('snapd_refresh') or '' # None will error hookenv.log('setting snapd_refresh timer to: {}'.format(timer)) snap.set_refresh_timer(timer)
def create_replication_user(): username = replication_username() hookenv.log("Creating replication user {}".format(username)) con = postgresql.connect() postgresql.ensure_user(con, username, leader_get("replication_password"), replication=True) con.commit() reactive.set_state("postgresql.replication.replication_user_created")
def restarted_nodes(self): nodes = [] nonce = leader_get('restart_nonce') if not nonce: return nodes # We're not restarting if no nonce is set. for conv in self.conversations(): if conv.get_remote('restarted.{}'.format(nonce)): nodes.append((conv.scope, conv.get_remote('private-address'))) return nodes
def states_to_check(self, required_relations=None): """Custom state check function for charm specific state check needs. Interface used for ``neutron_openvswitch`` subordinate lacks a ``available`` state. The ``Octavia`` service will not operate normally until Nova and Neutron resources have been created, this needs to be tracked in workload status. """ states_to_check = super().states_to_check(required_relations) override_relation = 'neutron-openvswitch' if override_relation in states_to_check: states_to_check[override_relation] = [ ("{}.connected".format(override_relation), "blocked", "'{}' missing".format(override_relation)) ] if not leadership.leader_get('amp-boot-network-list'): if not reactive.is_flag_set('config.default.create-mgmt-network'): # we are configured to not create required resources and they # are not present, prompt end-user to create them. states_to_check['crud'] = [( 'crud.available', # imaginate ``crud`` relation 'blocked', 'Awaiting end-user to create required resources and ' 'execute `configure-resources` action')] else: if reactive.is_flag_set('leadership.is_leader'): who = 'end-user execution of `configure-resources` action' else: who = 'leader' states_to_check['crud'] = [( 'crud.available', # imaginate ``crud`` relation 'blocked', 'Awaiting {} to create required resources'.format(who))] # if these configuration options are at default value it means they are # not set by end-user, they are required for successfull creation of # load balancer instances. if (reactive.is_flag_set('config.default.lb-mgmt-issuing-cacert') or reactive.is_flag_set( 'config.default.lb-mgmt-issuing-ca-private-key') or reactive.is_flag_set( 'config.default.lb-mgmt-issuing-ca-key-passphrase') or reactive.is_flag_set( 'config.default.lb-mgmt-controller-cacert') or reactive.is_flag_set( 'config.default.lb-mgmt-controller-cert')): # set workload status to prompt end-user attention states_to_check['config'] = [( 'config._required_certs', # imaginate flag 'blocked', 'Missing required certificate configuration, please ' 'examine documentation')] return states_to_check
def write_cert_to_leadership_data(): cert_ep = endpoint_from_flag('certificates.certs.available') my_cert = cert_ep.server_certs_map[_get_cert_common_name()] leader_set({'cert': my_cert.cert, 'key': my_cert.key}) # we also use this time to generate the cluster id if not leader_get('cluster_id'): cluster_id = ''.join( random.choice(string.ascii_letters + string.digits) for i in range(24)) leader_set({'cluster_id': cluster_id})
def create_global_client_cert(): """ This is for backwards compatibility with older tls-certificate clients only. Obviously, it's not good security / design to have clients sharing a certificate, but it seems that there are clients that depend on this (though some, like etcd, only block on the flag that it triggers but don't actually use the cert), so we have to set it for now. """ client_cert = leader_get("client_certificate") client_key = leader_get("client_key") if not client_cert or not client_key: hookenv.log("Unable to find global client cert on " "leadership data, generating...") client_cert, client_key = create_client_certificate() # Set the client certificate and key on leadership data. leader_set({"client_certificate": client_cert}) leader_set({"client_key": client_key}) else: hookenv.log("found global client cert on leadership " "data, not generating...") set_flag("easyrsa.global-client-cert.created")
def upgrade_charm(): remove_state('calico.binaries.installed') remove_state('calico.service.installed') remove_state('calico.pool.configured') remove_state('calico.image.pulled') remove_state('calico.npc.deployed') if is_leader() and not leader_get('calico-v3-data-ready'): leader_set({ 'calico-v3-data-migration-needed': True, 'calico-v3-npc-cleanup-needed': True, 'calico-v3-completion-needed': True })
def mirror_credentials(rel): pub = rel.to_publish_raw config = cassandra.config() if config['authenticator'].lower() == 'allowallauthenticator': if 'username' in pub: del pub['username'] del pub['password'] return source_unit = leadership.leader_get('client_rel_source') source_data = hookenv.relation_get(unit=source_unit, rid=rel.relation_id) if source_data and 'username' in source_data: hookenv.log("Mirroring credentials for {} ({}) from {}".format(rel.application_name, rel.relation_id, source_unit)) pub['username'] = source_data['username'] pub['password'] = source_data['password']
def update_restart_queue(zkpeer): ''' If a Zookeeper node has restarted as part of a rolling restart, pop it off of the queue. ''' queue = json.loads(leader_get('restart_queue') or '[]') if not queue: return restarted_nodes = _ip_list(zkpeer.restarted_nodes()) new_queue = [node for node in queue if node not in restarted_nodes] if new_queue != queue: hookenv.log('Leader updating restart queue: {}'.format(queue)) leader_set(restart_queue=json.dumps(new_queue))
def db_relation_joined(): """ Hook to run when somebody connects to us. """ passwords = json.loads(leader_get('passwords')) # TODO: Exception handling. relation_set( host=config('couchdb-host'), ip=unit_public_ip(), port=config('couchdb-port'), admin_pass=passwords['admin_pass'], repl_pass=passwords['repl_pass'] ) log("{} joined".format(os.getenv('ENSEMBLE_REMOTE_UNIT')), INFO)
def install_spark(hadoop=None): spark_master_host = leadership.leader_get('master-fqdn') hosts = { 'spark-master': spark_master_host, } if is_state('hadoop.yarn.ready'): rms = hadoop.resourcemanagers() hosts['resourcemanager'] = rms[0] if is_state('hadoop.hdfs.ready'): nns = hadoop.namenodes() hosts['namenode'] = nns[0] dist = get_dist_config() spark = Spark(dist) spark.configure(hosts)
def start(): currentenv = dict(os.environ) port = hookenv.config('carte_port') javaopts = hookenv.config('java_opts') if javaopts: currentenv['JAVA_OPTS'] = javaopts try: check_call(['pgrep', '-f', 'org.pentaho.di.www.Carte']) except CalledProcessError: check_call(['su', 'etl', '-c', '/opt/data-integration/carte.sh ' '/home/etl/carte-config.xml &'], env=currentenv, cwd="/opt/data-integration") hookenv.open_port(port) status_set('active', 'Carte is ready! Master is:' + leader_get('public_ip'))
def etcd_data_change(etcd): ''' Etcd scale events block master reconfiguration due to the kubernetes-master.components.started state. We need a way to handle these events consistenly only when the number of etcd units has actually changed ''' # key off of the connection string connection_string = etcd.get_connection_string() # If the connection string changes, remove the started state to trigger # handling of the master components if data_changed('etcd-connect', connection_string): remove_state('kubernetes-master.components.started') # We are the leader and the etcd_version is not set meaning # this is the first time we connect to etcd. if is_state('leadership.is_leader') and not leader_get('etcd_version'): if etcd.get_version().startswith('3.'): leader_set(etcd_version='etcd3') else: leader_set(etcd_version='etcd2')
def install_spark(hadoop=None, zks=None): spark_master_host = leadership.leader_get('master-fqdn') if not spark_master_host: hookenv.status_set('waiting', 'master not elected yet') return False hosts = { 'spark-master': spark_master_host, } if is_state('hadoop.yarn.ready'): rms = hadoop.resourcemanagers() hosts['resourcemanager'] = rms[0] if is_state('hadoop.hdfs.ready'): nns = hadoop.namenodes() hosts['namenode'] = nns[0] spark = Spark() spark.configure(hosts, zks, get_spark_peers()) return True
def reinstall_spark(): spark_master_host = leadership.leader_get('master-fqdn') deployment_matrix = { 'spark_master': spark_master_host, 'yarn_ready': is_state('hadoop.yarn.ready'), 'hdfs_ready': is_state('hadoop.hdfs.ready'), } if not data_changed('deployment_matrix', deployment_matrix): return hookenv.status_set('maintenance', 'Configuring Spark') hadoop = (RelationBase.from_state('hadoop.yarn.ready') or RelationBase.from_state('hadoop.hdfs.ready')) install_spark(hadoop) if is_state('hadoop.yarn.ready'): set_deployment_mode_state('spark.yarn.installed') else: set_deployment_mode_state('spark.standalone.installed') report_status()
def start_master(etcd): '''Run the Kubernetes master components.''' hookenv.status_set('maintenance', 'Configuring the Kubernetes master services.') freeze_service_cidr() if not etcd.get_connection_string(): # etcd is not returning a connection string. This happens when # the master unit disconnects from etcd and is ready to terminate. # No point in trying to start master services and fail. Just return. return # TODO: Make sure below relation is handled on change # https://github.com/kubernetes/kubernetes/issues/43461 handle_etcd_relation(etcd) # Add CLI options to all components leader_etcd_version = leader_get('etcd_version') configure_apiserver(etcd.get_connection_string(), leader_etcd_version) configure_controller_manager() configure_scheduler() set_state('kubernetes-master.components.started') hookenv.open_port(6443)
def restart_for_quorum(zkpeer): ''' If we're the next node in the restart queue, restart, and then inform the leader that we've restarted. (If we are the leader, remove ourselves from the queue, and update the leadership data.) ''' private_address = hookenv.unit_get('private-address') queue = json.loads(leader_get('restart_queue') or '[]') if not queue: # Everything has restarted. return if private_address == queue[0]: # It's our turn to restart. _restart_zookeeper('rolling restart for quorum update') if is_state('leadership.is_leader'): queue = queue[1:] hookenv.log('Leader updating restart queue: {}'.format(queue)) leader_set(restart_queue=json.dumps(queue)) else: zkpeer.inform_restart()
def check_for_upgrade_needed(): '''An upgrade charm event was triggered by Juju, react to that here.''' hookenv.status_set('maintenance', 'Checking resources') migrate_from_pre_snaps() add_rbac_roles() set_state('reconfigure.authentication.setup') remove_state('authentication.setup') changed = snap_resources_changed() if changed == 'yes': set_upgrade_needed() elif changed == 'unknown': # We are here on an upgrade from non-rolling master # Since this upgrade might also include resource updates eg # juju upgrade-charm kubernetes-master --resource kube-any=my.snap # we take no risk and forcibly upgrade the snaps. # Forcibly means we do not prompt the user to call the upgrade action. set_upgrade_needed(forced=True) # Set the auto storage backend to etcd2. auto_storage_backend = leader_get('auto_storage_backend') is_leader = is_state('leadership.is_leader') if not auto_storage_backend and is_leader: leader_set(auto_storage_backend='etcd2')
def upgrade_for_etcd(): # we are upgrading the charm. # If this is an old deployment etcd_version is not set # so if we are the leader we need to set it to v2 if not leader_get('etcd_version') and is_state('leadership.is_leader'): leader_set(etcd_version='etcd2')
def get_cluster_nodes(): return json.loads(leadership.leader_get('cluster-nodes') or '[]')
def on_config_api_extra_args_change(etcd): configure_apiserver(etcd.get_connection_string(), leader_get('etcd_version'))
def getStorageBackend(): storage_backend = hookenv.config('storage-backend') if storage_backend == 'auto': storage_backend = leader_get('auto_storage_backend') return storage_backend
def send_ssh_key(datanode): datanode.send_ssh_key(leadership.leader_get('ssh-key-pub'))
def render_master_config(): render('carte-config/master.xml.j2', '/home/etl/carte-config.xml', { 'carteport': leader_get('port'), 'cartehostname': hookenv.unit_private_ip() })