def get_nagios_unit_name(relation_name='nrpe-external-master'): """ Return the nagios unit name prepended with host_context if needed :param str relation_name: Name of relation nrpe sub joined to """ host_context = get_nagios_hostcontext(relation_name) if host_context: unit = "%s:%s" % (host_context, local_unit()) else: unit = local_unit() return unit
def test_maybe_restart(self, status_set, stop_cassandra, start_cassandra, remount, ensure_directories, is_bootstrapped, is_leader): coordinator.grants = {} coordinator.requests = {hookenv.local_unit(): {}} coordinator.relid = 'cluster:1' coordinator.grant('restart', hookenv.local_unit()) actions.maybe_restart('') stop_cassandra.assert_called_once_with() remount.assert_called_once_with() ensure_directories.assert_called_once_with() start_cassandra.assert_called_once_with()
def cluster_with(): if is_unit_paused_set(): log("Do not run cluster_with while unit is paused", "WARNING") return log('Clustering with new node') # check the leader and try to cluster with it node = leader_node() if node: if node in running_nodes(): log('Host already clustered with %s.' % node) cluster_rid = relation_id('cluster', local_unit()) is_clustered = relation_get(attribute='clustered', rid=cluster_rid, unit=local_unit()) log('am I clustered?: %s' % bool(is_clustered), level=DEBUG) if not is_clustered: # NOTE(freyes): this node needs to be marked as clustered, it's # part of the cluster according to 'rabbitmqctl cluster_status' # (LP: #1691510) relation_set(relation_id=cluster_rid, clustered=get_unit_hostname(), timestamp=time.time()) return False # NOTE: The primary problem rabbitmq has clustering is when # more than one node attempts to cluster at the same time. # The asynchronous nature of hook firing nearly guarantees # this. Using cluster_wait based on modulo_distribution cluster_wait() try: join_cluster(node) # NOTE: toggle the cluster relation to ensure that any peers # already clustered re-assess status correctly relation_set(clustered=get_unit_hostname(), timestamp=time.time()) return True except subprocess.CalledProcessError as e: status_set('blocked', 'Failed to cluster with %s. Exception: %s' % (node, e)) start_app() else: status_set('waiting', 'Leader not available for clustering') return False return False
def configure_namenode(self, secondary_host=None, secondary_port=None): dc = self.hadoop_base.dist_config host = hookenv.local_unit().replace('/', '-') port = dc.port('namenode') self.configure_hdfs_base(host, port) cfg = self.hadoop_base.charm_config hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.replication'] = cfg['dfs_replication'] props['dfs.blocksize'] = int(cfg['dfs_blocksize']) props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'true' props['dfs.namenode.http-address'] = '0.0.0.0:{}'.format(dc.port('nn_webapp_http')) # TODO: support SSL # props['dfs.namenode.https-address'] = '0.0.0.0:{}'.format(dc.port('nn_webapp_https')) # FIXME hack-around until transition to layers is complete if not (secondary_host and secondary_port) and helpers: unit, secondary = helpers.any_ready_unit('secondary') if unit: secondary_host = secondary['hostname'] secondary_port = secondary['port'] if secondary_host and secondary_port: props['dfs.secondary.http.address'] = '{host}:{port}'.format( host=secondary_host, port=secondary_port, )
def get_broker_rsp_key(): """Return broker response key for this unit This is the key that ceph is going to use to pass request status information back to this unit """ return 'broker-rsp-' + local_unit().replace('/', '-')
def process_certificates(service_name, relation_id, unit, custom_hostname_link=None, user='******', group='root'): """Process the certificates supplied down the relation :param service_name: str Name of service the certifcates are for. :param relation_id: str Relation id providing the certs :param unit: str Unit providing the certs :param custom_hostname_link: str Name of custom link to create :param user: (Optional) Owner of certificate files. Defaults to 'root' :type user: str :param group: (Optional) Group of certificate files. Defaults to 'root' :type group: str """ data = relation_get(rid=relation_id, unit=unit) ssl_dir = os.path.join('/etc/apache2/ssl/', service_name) mkdir(path=ssl_dir) name = local_unit().replace('/', '_') certs = data.get('{}.processed_requests'.format(name)) chain = data.get('chain') ca = data.get('ca') if certs: certs = json.loads(certs) install_ca_cert(ca.encode()) install_certs(ssl_dir, certs, chain, user=user, group=group) create_ip_cert_links( ssl_dir, custom_hostname_link=custom_hostname_link)
def server_key(source, destination, user=None, group=None): """ Copy the server key to the destination, creating directories if needed and assign ownership if set. :param string source: The directory to look for the key, if None the key will be copied from default location. :param string destination: The path to save the key. :param string user: The optional name of the user to own the key. :param string group: The optional name of the group to own key. """ _ensure_directory(destination, user, group) if not source: # Must remove the path characters from the local unit name. key_name = local_unit().replace('/', '_') # The location of server key is easy-rsa/easyrsa3/pki/private source = \ os.path.join( charm_dir(), 'easy-rsa/easyrsa3/pki/private/{0}.key'.format(key_name)) # Copy the key to the destination. copy2(source, destination) chown(destination, user, group) # Set the destination path for the client key path on the unitdata. unitdata.kv().set('server-key-path', destination)
def provide(self, remote_service, all_ready): data = super(NodeManager, self).provide(remote_service, all_ready) hostname = hookenv.local_unit().replace('/', '-') data.update({ 'hostname': hostname, }) return data
def db_changed(): rel = os_release('glance-common') if 'shared-db' not in CONFIGS.complete_contexts(): juju_log('shared-db relation incomplete. Peer not ready?') return CONFIGS.write(GLANCE_REGISTRY_CONF) # since folsom, a db connection setting in glance-api.conf is required. if rel != "essex": CONFIGS.write(GLANCE_API_CONF) if is_elected_leader(CLUSTER_RES): # Bugs 1353135 & 1187508. Dbs can appear to be ready before the units # acl entry has been added. So, if the db supports passing a list of # permitted units then check if we're in the list. allowed_units = relation_get('allowed_units') if allowed_units and local_unit() in allowed_units.split(): if rel == "essex": status = call(['glance-manage', 'db_version']) if status != 0: juju_log('Setting version_control to 0') cmd = ["glance-manage", "version_control", "0"] check_call(cmd) juju_log('Cluster leader, performing db sync') migrate_database() else: juju_log('allowed_units either not presented, or local unit ' 'not in acl list: %s' % allowed_units)
def db_changed(): if 'shared-db' not in CONFIGS.complete_contexts(): log('shared-db relation incomplete. Peer not ready?') return CONFIGS.write_all() if is_elected_leader(CLUSTER_RES): # Bugs 1353135 & 1187508. Dbs can appear to be ready before the units # acl entry has been added. So, if the db supports passing a list of # permitted units then check if we're in the list. allowed_units = relation_get('nova_allowed_units') if allowed_units and local_unit() in allowed_units.split(): status_set('maintenance', 'Running nova db migration') migrate_nova_database() log('Triggering remote cloud-compute restarts.') [compute_joined(rid=rid, remote_restart=True) for rid in relation_ids('cloud-compute')] log('Triggering remote cell restarts.') [nova_cell_relation_joined(rid=rid, remote_restart=True) for rid in relation_ids('cell')] conditional_neutron_migration() else: log('allowed_units either not presented, or local unit ' 'not in acl list: %s' % repr(allowed_units)) for r_id in relation_ids('nova-api'): nova_api_relation_joined(rid=r_id)
def _override(self): ''' Return a dict of keys and values that will override puppet's defaults. ''' override = { "hadoop_zookeeper::server::myid": local_unit().split("/")[1], "hadoop_zookeeper::server::ensemble": self.read_peers() } conf = config() network_interface = conf.get('network_interface') autopurge_purge_interval = conf.get('autopurge_purge_interval') autopurge_snap_retain_count = conf.get('autopurge_snap_retain_count') if network_interface: key = "hadoop_zookeeper::server::client_bind_addr" override[key] = Bigtop().get_ip_for_interface(network_interface) if autopurge_purge_interval: key = "hadoop_zookeeper::server::autopurge_purge_interval" override[key] = autopurge_purge_interval if autopurge_snap_retain_count: key = "hadoop_zookeeper::server::autopurge_snap_retain_count" override[key] = autopurge_snap_retain_count return override
def check_local_db_actions_complete(): """Check if we have received db init'd notification and restart services if we have not already. NOTE: this must only be called from peer relation context. """ if not is_db_initialised(): return settings = relation_get() or {} if settings: init_id = settings.get(NEUTRON_DB_INIT_RKEY) echoed_init_id = relation_get(unit=local_unit(), attribute=NEUTRON_DB_INIT_ECHO_RKEY) # If we have received an init notification from a peer unit # (assumed to be the leader) then restart neutron-api and echo the # notification and don't restart again unless we receive a new # (different) notification. if is_new_dbinit_notification(init_id, echoed_init_id): if not is_unit_paused_set(): log("Restarting neutron services following db " "initialisation", level=DEBUG) service_restart('neutron-server') # Echo notification relation_set(**{NEUTRON_DB_INIT_ECHO_RKEY: init_id})
def update_peers(self, node_list): ''' This method wtill return True if the master peer was updated. False otherwise. ''' old_master = unitdata.kv().get('spark_master.ip', 'not_set') master_ip = '' if not node_list: hookenv.log("No peers yet. Acting as master.") master_ip = utils.resolve_private_address(hookenv.unit_private_ip()) nodes = [(hookenv.local_unit(), master_ip)] unitdata.kv().set('spark_all_master.ips', nodes) unitdata.kv().set('spark_master.ip', master_ip) else: # Use as master the node with minimum Id # Any ordering is fine here. Lexicografical ordering too. node_list.sort() master_ip = utils.resolve_private_address(node_list[0][1]) unitdata.kv().set('spark_master.ip', master_ip) unitdata.kv().set('spark_all_master.ips', node_list) hookenv.log("Updating master ip to {}.".format(master_ip)) unitdata.kv().set('spark_master.is_set', True) unitdata.kv().flush(True) # Incase of an HA setup adding peers must be treated as a potential # mastr change if (old_master != master_ip) or unitdata.kv().get('zookeepers.available', False): return True else: return False
def is_db_initialised(cluster_rid=None): """ Check whether a db intialisation has been performed by any peer unit. We base our decision on whether we or any of our peers has previously sent or echoed an initialisation notification. @param cluster_rid: current relation id. If none provided, all cluster relation ids will be checked. @return: True if there has been a db initialisation otherwise False. """ if cluster_rid: rids = [cluster_rid] else: rids = relation_ids('cluster') shared_db_rel_id = (relation_ids('shared-db') or [None])[0] if not shared_db_rel_id: return False for c_rid in rids: units = related_units(relid=c_rid) + [local_unit()] for unit in units: settings = relation_get(unit=unit, rid=c_rid) or {} for key in [NEUTRON_DB_INIT_RKEY, NEUTRON_DB_INIT_ECHO_RKEY]: if shared_db_rel_id in settings.get(key, ''): return True return False
def _emit_state(self): # Emit this units lock status. for lock in sorted(self.requests[hookenv.local_unit()].keys()): if self.granted(lock): self.msg('Granted {}'.format(lock)) else: self.msg('Waiting on {}'.format(lock))
def _load_state(self): self.msg('Loading state'.format(self._name())) # All responses must be stored in the leadership settings. # The leader cannot use local state, as a different unit may # be leader next time. Which is fine, as the leadership # settings are always available. self.grants = json.loads(hookenv.leader_get(self.key) or '{}') local_unit = hookenv.local_unit() # All requests must be stored on the peer relation. This is # the only channel units have to communicate with the leader. # Even the leader needs to store its requests here, as a # different unit may be leader by the time the request can be # granted. if self.relid is None: # The peer relation is not available. Maybe we are early in # the units's lifecycle. Maybe this unit is standalone. # Fallback to using local state. self.msg('No peer relation. Loading local state') self.requests = {local_unit: self._load_local_state()} else: self.requests = self._load_peer_state() if local_unit not in self.requests: # The peer relation has just been joined. Update any state # loaded from our peers with our local state. self.msg('New peer relation. Merging local state') self.requests[local_unit] = self._load_local_state()
def leader_init_db_if_ready(skip_acl_check=False, db_rid=None, unit=None): """Initialise db if leader and db not yet intialised. NOTE: must be called from database context. """ if not hookenv.is_leader(): hookenv.log("Not leader - skipping db init", level=hookenv.DEBUG) return if ncc_utils.is_db_initialised(): hookenv.log("Database already initialised - skipping db init", level=hookenv.DEBUG) return # Bugs 1353135 & 1187508. Dbs can appear to be ready before the units # acl entry has been added. So, if the db supports passing a list of # permitted units then check if we're in the list. allowed_units = hookenv.relation_get('nova_allowed_units', rid=db_rid, unit=unit) if skip_acl_check or (allowed_units and hookenv.local_unit() in allowed_units.split()): hookenv.status_set('maintenance', 'Running nova db migration') ncc_utils.migrate_nova_databases() hookenv.log('Triggering remote restarts.') update_nova_relation(remote_restart=True) else: hookenv.log('allowed_units either not presented, or local unit ' 'not in acl list: %s' % repr(allowed_units))
def send_data(tls): '''Send the data that is required to create a server certificate for this server.''' # Use the public ip of this unit as the Common Name for the certificate. common_name = hookenv.unit_public_ip() # Get the SDN gateway based on the cidr address. kubernetes_service_ip = get_kubernetes_service_ip() domain = hookenv.config('dns_domain') # Create SANs that the tls layer will add to the server cert. sans = [ hookenv.unit_public_ip(), hookenv.unit_private_ip(), socket.gethostname(), kubernetes_service_ip, 'kubernetes', 'kubernetes.{0}'.format(domain), 'kubernetes.default', 'kubernetes.default.svc', 'kubernetes.default.svc.{0}'.format(domain) ] # maybe they have extra names they want as SANs extra_sans = hookenv.config('extra_sans') if extra_sans and not extra_sans == "": sans.extend(extra_sans.split()) # Create a path safe name by removing path characters from the unit name. certificate_name = hookenv.local_unit().replace('/', '_') # Request a server cert with this information. tls.request_server_cert(common_name, sans, certificate_name)
def __init__(self, hostname=None, primary=True): super(NRPE, self).__init__() self.config = config() self.primary = primary self.nagios_context = self.config['nagios_context'] if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']: self.nagios_servicegroups = self.config['nagios_servicegroups'] else: self.nagios_servicegroups = self.nagios_context self.unit_name = local_unit().replace('/', '-') if hostname: self.hostname = hostname else: nagios_hostname = get_nagios_hostname() if nagios_hostname: self.hostname = nagios_hostname else: self.hostname = "{}-{}".format(self.nagios_context, self.unit_name) self.checks = [] # Iff in an nrpe-external-master relation hook, set primary status relation = relation_ids('nrpe-external-master') if relation: log("Setting charm primary status {}".format(primary)) for rid in relation_ids('nrpe-external-master'): relation_set(relation_id=rid, relation_settings={'primary': self.primary})
def __call__(self): ''' Builds half a context for the haproxy template, which describes all peers to be included in the cluster. Each charm needs to include its own context generator that describes the port mapping. ''' if not relation_ids('cluster'): return {} cluster_hosts = {} l_unit = local_unit().replace('/', '-') cluster_hosts[l_unit] = unit_get('private-address') for rid in relation_ids('cluster'): for unit in related_units(rid): _unit = unit.replace('/', '-') addr = relation_get('private-address', rid=rid, unit=unit) cluster_hosts[_unit] = addr ctxt = { 'units': cluster_hosts, } if len(cluster_hosts.keys()) > 1: # Enable haproxy when we have enough peers. log('Ensuring haproxy enabled in /etc/default/haproxy.') with open('/etc/default/haproxy', 'w') as out: out.write('ENABLED=1\n') return ctxt log('HAProxy context is incomplete, this unit has no peers.') return {}
def get_ha_nodes(): ha_units = peer_ips(peer_relation='hanode') ha_nodes = {} for unit in ha_units: corosync_id = get_corosync_id(unit) addr = ha_units[unit] if config('prefer-ipv6'): if not utils.is_ipv6(addr): # Not an error since cluster may still be forming/updating log("Expected an ipv6 address but got %s" % (addr), level=WARNING) ha_nodes[corosync_id] = addr else: ha_nodes[corosync_id] = get_host_ip(addr) corosync_id = get_corosync_id(local_unit()) if config('prefer-ipv6'): addr = get_ipv6_addr() else: addr = get_host_ip(unit_get('private-address')) ha_nodes[corosync_id] = addr return ha_nodes
def is_bootstrapped(): """Determine if each node in the cluster has been bootstrapped and the cluster is complete with the expected number of peers. Check that each node in the cluster, including this one, has set bootstrap-uuid on the cluster relation. Having min-cluster-size set will guarantee is_bootstrapped will not return True until the expected number of peers are bootstrapped. If min-cluster-size is not set, it will check peer relations to estimate the expected cluster size. If min-cluster-size is not set and there are no peers it must assume the cluster is bootstrapped in order to allow for single unit deployments. @returns boolean """ min_size = get_min_cluster_size() if not is_sufficient_peers(): return False elif min_size > 1: uuids = [] for relation_id in relation_ids('cluster'): units = related_units(relation_id) or [] units.append(local_unit()) for unit in units: if not relation_get(attribute='bootstrap-uuid', rid=relation_id, unit=unit): log("{} is not yet clustered".format(unit), DEBUG) return False else: bootstrap_uuid = relation_get(attribute='bootstrap-uuid', rid=relation_id, unit=unit) if bootstrap_uuid: uuids.append(bootstrap_uuid) if len(uuids) < min_size: log("Fewer than minimum cluster size: " "{} percona units reporting clustered".format(min_size), DEBUG) return False elif len(set(uuids)) > 1: raise Exception("Found inconsistent bootstrap uuids: " "{}".format((uuids))) else: log("All {} percona units reporting clustered".format(min_size), DEBUG) elif not seeded(): # Single unit deployment but not yet bootstrapped return False # Set INITIAL_CLUSTERED_KEY as the cluster has fully bootstrapped kvstore = kv() if not kvstore.get(INITIAL_CLUSTERED_KEY, False): kvstore.set(key=INITIAL_CLUSTERED_KEY, value=True) kvstore.flush() return True
def __call__(self): ''' Horizon specific HAProxy context; haproxy is used all the time in the openstack dashboard charm so a single instance just self refers ''' cluster_hosts = {} l_unit = local_unit().replace('/', '-') cluster_hosts[l_unit] = unit_get('private-address') for rid in relation_ids('cluster'): for unit in related_units(rid): _unit = unit.replace('/', '-') addr = relation_get('private-address', rid=rid, unit=unit) cluster_hosts[_unit] = addr log('Ensuring haproxy enabled in /etc/default/haproxy.') with open('/etc/default/haproxy', 'w') as out: out.write('ENABLED=1\n') ctxt = { 'units': cluster_hosts, 'service_ports': { 'dash_insecure': [80, 70], 'dash_secure': [443, 433] } } return ctxt
def server_cert(source, destination, user=None, group=None): """ Copy the server certificate to the destination, creating directories if needed and assign ownership if set. :param string source: The directory to look for the certificate, if None the certificate will be copied from unit data. :param string destination: The path to save the certificate. :param string user: The optional name of the user to own the certificate. :param string group: The optional name of the group to own certificate. """ _ensure_directory(destination, user, group) if not source: # Must remove the path characters from the local unit name. key_name = local_unit().replace('/', '_') # The location of server certificate is easy-rsa/easyrsa3/pki/issued source = \ os.path.join( charm_dir(), 'easy-rsa/easyrsa3/pki/issued/{0}.crt'.format(key_name)) if os.path.isfile(source): # Copy the server certificate to the destination. copy2(source, destination) else: # No source server certificate, get the value from unit data. server_cert_key = 'tls.server.certificate' # Save the certificate data to the destination directory. _save_unitdata(server_cert_key, destination) chown(destination, user, group) # Set the destination path for the client certificate path on the unitdata. unitdata.kv().set('server-cert-path', destination)
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace("/", "-") hdfs_port = hadoop.dist_config.port("namenode") webhdfs_port = hadoop.dist_config.port("nn_webapp_http") utils.update_kv_hosts({node["ip"]: node["host"] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key("hdfs")) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node["host"] for node in datanode.nodes()] if data_changed("namenode.slaves", slaves): unitdata.kv().set("namenode.slaves", slaves) hdfs.register_slaves(slaves) hookenv.status_set( "active", "Ready ({count} DataNode{s})".format(count=len(slaves), s="s" if len(slaves) > 1 else "") ) set_state("namenode.ready")
def auth_update(): # We used to have individual superuser credentials for each node, # which was unnecessarily clever. username = '******'.format(re.subn(r'\W', '_', hookenv.local_unit())[0]) username, password = cassandra.get_cqlshrc_credentials(username) leadership.leader_set(username=username, password=password) hookenv.log('Migrated charm superuser credentials')
def leader_init_db_if_ready(skip_acl_check=False, skip_cells_restarts=False, db_rid=None, unit=None): """Initialise db if leader and db not yet intialised. NOTE: must be called from database context. """ if not is_elected_leader(CLUSTER_RES): log("Not leader - skipping db init", level=DEBUG) return if is_db_initialised(): log("Database already initialised - skipping db init", level=DEBUG) return # Bugs 1353135 & 1187508. Dbs can appear to be ready before the units # acl entry has been added. So, if the db supports passing a list of # permitted units then check if we're in the list. allowed_units = relation_get('nova_allowed_units', rid=db_rid, unit=unit) if skip_acl_check or (allowed_units and local_unit() in allowed_units.split()): status_set('maintenance', 'Running nova db migration') migrate_nova_database() log('Triggering remote cloud-compute restarts.') [compute_joined(rid=rid, remote_restart=True) for rid in relation_ids('cloud-compute')] log('Triggering remote neutron-network-service restarts.') [quantum_joined(rid=rid, remote_restart=True) for rid in relation_ids('quantum-network-service')] if not skip_cells_restarts: log('Triggering remote cell restarts.') [nova_cell_relation_joined(rid=rid, remote_restart=True) for rid in relation_ids('cell')] else: log('allowed_units either not presented, or local unit ' 'not in acl list: %s' % repr(allowed_units))
def granted(self, lock): '''Return True if a previously requested lock has been granted''' unit = hookenv.local_unit() ts = self.requests[unit].get(lock) if ts and self.grants.get(unit, {}).get(lock) == ts: return True return False
def update_zoo_cfg(self, zkid=getid(local_unit()), ip=unit_private_ip(), remove=False): """ Add or remove Zookeeper units from zoo.cfg. Configuration for a Zookeeper quorum requires listing all unique servers (server.X=<ip>:2888:3888) in the zoo.cfg. This function manages server.X entries. """ zookeeper_cfg = "{}/zoo.cfg".format(self.dist_config.path('zookeeper_conf')) key = "server.{}".format(zkid) value = "={}:2888:3888".format(ip) found = False if remove: with open(zookeeper_cfg, 'r', encoding='utf-8') as f: contents = f.readlines() for l in range(0, len(contents)): if contents[l].startswith(key): contents.pop(l) found = True break if found: with open(zookeeper_cfg, 'w', encoding='utf-8') as f: f.writelines(contents) else: with open(zookeeper_cfg, 'r', encoding='utf-8') as f: contents = f.readlines() for l in range(0, len(contents)): if contents[l].startswith(key): contents[l] = key + value + "\n" found = True if not found: contents.append(key + value + "\n") with open(zookeeper_cfg, 'w', encoding='utf-8') as f: f.writelines(contents)
def test_requires_live_node_down(self, is_running, is_decommissioned, get_sup): is_decommissioned.return_value = False # Is not decommissioned. is_running.return_value = False # Is not running. get_sup.return_value = set([hookenv.local_unit()]) # Creds exist. self.assertFalse(bool(definitions.RequiresLiveNode()))
def get_request(self): """Generate request from the batched up entries """ if self.hostname_entry: self.entries.append(self.hostname_entry) request = {} for entry in self.entries: sans = sorted(list(set(entry['addresses']))) request[entry['cn']] = {'sans': sans} if self.json_encode: req = {'cert_requests': json.dumps(request, sort_keys=True)} else: req = {'cert_requests': request} req['unit_name'] = local_unit().replace('/', '_') return req
def rebuild_relation(relid, unit, relation): relname = relid.split(":")[0] unitname = unit.replace("/", "-") this_unit = hookenv.local_unit() allowed_units = relation.get("allowed-units", "") if this_unit not in allowed_units.split(): log("Not yet authorized on {}".format(relid), INFO) return script_name = "psql-{}-{}".format(relname, unitname) build_script(script_name, relation) state = relation.get("state", None) if state in ("master", "hot standby"): script_name = "psql-{}-{}".format(relname, state.replace(" ", "-")) build_script(script_name, relation)
def remove_deferred_restarts_check(nrpe): """ Remove NRPE check for services with deferred service restarts. :param NRPE nrpe: NRPE object to remove check from """ unit_name = local_unit().replace('/', '-') shortname = unit_name + '_deferred_restarts' check_cmd = 'check_deferred_restarts.py --application {}'.format( application_name()) log('Removing deferred restarts nrpe check: {}'.format(shortname)) nrpe.remove_check( shortname=shortname, description='Check deferred service restarts {}'.format(unit_name), check_cmd=check_cmd)
def setup_nagios(nagios): config = hookenv.config() unit_name = hookenv.local_unit() au.download( "https://raw.githubusercontent.com/buggtb/dcos-master-charm/master/monitoring_scripts/check_service.sh", "/usr/bin/check_service.sh") au.download( "https://raw.githubusercontent.com/buggtb/dcos-master-charm/master/monitoring_scripts/dcos_unit_check.sh", "/usr/bin/dcos_unit_check.sh") nagios.add_check( ['/usr/bin/dcos_unit_check.sh'], name="check_dcos_master", description="Verify DCOS Master Services", context="dcos_master", unit=unit_name, )
def update_cell_db_if_ready(skip_acl_check=False, db_rid=None, unit=None): """Update the cells db if leader and db's are already intialised""" if not is_leader(): return if not is_db_initialised(): log("Database not initialised - skipping cell db update", level=DEBUG) return allowed_units = relation_get('nova_allowed_units', rid=db_rid, unit=unit) if skip_acl_check or (allowed_units and local_unit() in allowed_units.split()): update_cell_database() else: log('allowed_units either not presented, or local unit ' 'not in acl list: %s' % repr(allowed_units))
def is_new_dbinit_notification(init_id, echoed_init_id): """Returns True if we have a received a new db initialisation notification from a peer unit and we have not previously echoed it to indicate that we have already performed the necessary actions as result. Initialisation notification is expected to be of the format: <unit-id-leader-unit>-<shared-db-rel-id>-<uuid> @param init_db: received initialisation notification. @param echoed_init_db: value currently set for the echo key. @return: True if new notification and False if not. """ shared_db_rel_id = (relation_ids('shared-db') or [None])[0] return (shared_db_rel_id and init_id and (local_unit() not in init_id) and (shared_db_rel_id in init_id) and (echoed_init_id != init_id))
def configure_vault_mysql(mysql): if local_unit() not in mysql.allowed_units(): log("Deferring vault configuration until" " MySQL access is granted", level=DEBUG) return context = { 'storage_name': 'mysql', 'mysql_db_relation': mysql, } if mysql.ssl_ca(): _db_tls_ca_file = "/var/snap/vault/common/db-tls-ca.pem" _db_tls_ca = base64.decodebytes(mysql.ssl_ca().encode()) write_file(_db_tls_ca_file, _db_tls_ca, perms=0o600) context["tls_ca_file"] = _db_tls_ca_file configure_vault(context)
def __init__(self, name=None, mountpoint=None): if name is None: name = self._get_relation_name() super(StorageRelation, self).__init__(name) if mountpoint is None: mountpoint = os.path.join('/srv/', hookenv.local_unit().replace('/', '_')) self._requested_mountpoint = mountpoint if len(self.get('data', [])) == 0: self.mountpoint = None elif mountpoint == self['data'][0].get('mountpoint', None): self.mountpoint = mountpoint else: self.mountpoint = None
def _override(self): ''' Return a dict of keys and values that will override puppet's defaults. ''' override = { "hadoop_zookeeper::server::myid": local_unit().split("/")[1], "hadoop_zookeeper::server::ensemble": self.read_peers() } network_interface = config().get('network_interface') if network_interface: key = "hadoop_zookeeper::server::client_bind_addr" override[key] = Bigtop().get_ip_for_interface(network_interface) return override
def db_changed(): if 'shared-db' not in CONFIGS.complete_contexts(): juju_log('shared-db relation incomplete. Peer not ready?') return CONFIGS.write(CINDER_CONF) if is_elected_leader(CLUSTER_RES): # Bugs 1353135 & 1187508. Dbs can appear to be ready before the units # acl entry has been added. So, if the db supports passing a list of # permitted units then check if we're in the list. allowed_units = relation_get('allowed_units') if allowed_units and local_unit() in allowed_units.split(): juju_log('Cluster leader, performing db sync') migrate_database() else: juju_log('allowed_units either not presented, or local unit ' 'not in acl list: %s' % repr(allowed_units))
def read_peers(self): ''' Fetch the list of peers available. The first item in this list should always be the node that this code is executing on. ''' # A Zookeeper node likes to be first on the list. nodes = [(local_unit(), unit_private_ip())] # Get the list of peers zkpeer = RelationBase.from_state('zkpeer.joined') if zkpeer: nodes.extend(sorted(zkpeer.get_nodes())) nodes = [format_node(*node) for node in nodes] return nodes
def send_data(tls, kube_control): '''Send the data that is required to create a server certificate for this server.''' # Use the public ip of this unit as the Common Name for the certificate. common_name = hookenv.unit_public_ip() ingress_ip = get_ingress_address(kube_control) # Create SANs that the tls layer will add to the server cert. sans = [hookenv.unit_public_ip(), ingress_ip, gethostname()] # Create a path safe name by removing path characters from the unit name. certificate_name = hookenv.local_unit().replace('/', '_') # Request a server cert with this information. tls.request_server_cert(common_name, sans, certificate_name)
def get_previous_request(rid): """Return the last ceph broker request sent on a given relation @param rid: Relation id to query for request """ request = None broker_req = relation_get(attribute='broker_req', rid=rid, unit=local_unit()) if broker_req: request_data = json.loads(broker_req) request = CephBrokerRq(api_version=request_data['api-version'], request_id=request_data['request-id']) request.set_ops(request_data['ops']) return request
def get_internal_api_endpoints(relation=None): """ Determine the best API endpoints for an internal client to connect to. If a relation is given, it will try to take that into account. May return an empty list if an endpoint is expected but not yet available. """ try: goal_state = hookenv.goal_state() except NotImplementedError: goal_state = {} goal_state.setdefault("relations", {}) # Config takes precedence. endpoints_from_config = get_endpoints_from_config() if endpoints_from_config: return endpoints_from_config # If the internal LB relation is attached, use that or nothing. If it's # not attached but the external LB relation is, use that or nothing. for lb_type in ("internal", "external"): lb_endpoint = "loadbalancer-" + lb_type request_name = "api-server-" + lb_type if lb_endpoint in goal_state["relations"]: lb_provider = endpoint_from_name(lb_endpoint) lb_response = lb_provider.get_response(request_name) if not lb_response or lb_response.error: return [] return [(lb_response.address, STANDARD_API_PORT)] # Support the older loadbalancer relation (public-address interface). if "loadbalancer" in goal_state["relations"]: loadbalancer = endpoint_from_name("loadbalancer") lb_addresses = loadbalancer.get_addresses_ports() return [(host.get("public-address"), host.get("port")) for host in lb_addresses] # No LBs of any kind, so fall back to ingress-address. if not relation: kube_control = endpoint_from_name("kube-control") if not kube_control.relations: return [] relation = kube_control.relations[0] ingress_address = hookenv.ingress_address( relation.relation_id, hookenv.local_unit() ) return [(ingress_address, STANDARD_API_PORT)]
def prepare_tls_certificates(tls): common_name = hookenv.unit_public_ip() sans = set() sans.add(hookenv.unit_public_ip()) sans.update(get_ingress_addresses('db')) sans.update(get_ingress_addresses('cluster')) sans.add(socket.gethostname()) # add cluster peers as alt names when present cluster = endpoint_from_flag('cluster.joined') if cluster: for ip in cluster.get_db_ingress_addresses(): sans.add(ip) sans = sorted(sans) certificate_name = hookenv.local_unit().replace('/', '_') tls.request_server_cert(common_name, sans, certificate_name)
def get_api_endpoint(relation=None): """ Determine the best endpoint for a client to connect to. If a relation is given, it will take that into account when choosing an endpoint. """ endpoints = get_lb_endpoints() if endpoints: # select a single endpoint based on our local unit number return endpoints[kubernetes_common.get_unit_number() % len(endpoints)] elif relation: ingress_address = hookenv.ingress_address(relation.relation_id, hookenv.local_unit()) return (ingress_address, STANDARD_API_PORT) else: return (hookenv.unit_public_ip(), STANDARD_API_PORT)
def _https_services_tcp(vip): name = local_unit().replace("/", "-") addr = common_utils.get_ip() return [ {"service_name": "contrail-webui-https", "service_host": vip, "service_port": 8143, "service_options": [ "mode tcp", "option tcplog", "balance source", "cookie SERVERID insert indirect nocache", ], "servers": [[ name, addr, 8143, "cookie " + addr + " weight 1 maxconn 1024 check port 8143"]]}, ]
def config_changed(): utils.update_nrpe_config() if config.changed("control-network"): _update_cluster() if is_leader() and _address_changed(local_unit(), common_utils.get_ip()): _update_analyticsdb() docker_utils.config_changed() utils.update_charm_status() # leave it as latest - in case of exception in previous steps # config.changed doesn't work sometimes (when we saved config in this hook before) if config.get("saved-image-tag") != config["image-tag"]: utils.update_ziu("image-tag") config["saved-image-tag"] = config["image-tag"] config.save()
def process_certificates(service_name, relation_id, unit, custom_hostname_link=None, user='******', group='root', bindings=None): """Process the certificates supplied down the relation :param service_name: str Name of service the certifcates are for. :param relation_id: str Relation id providing the certs :param unit: str Unit providing the certs :param custom_hostname_link: str Name of custom link to create :param user: (Optional) Owner of certificate files. Defaults to 'root' :type user: str :param group: (Optional) Group of certificate files. Defaults to 'root' :type group: str :param bindings: List of bindings to check in addition to default api bindings. :type bindings: list of strings :returns: True if certificates processed for local unit or False :rtype: bool """ if bindings: # Add default API bindings to bindings list bindings = list(bindings + get_default_api_bindings()) else: # Use default API bindings bindings = get_default_api_bindings() data = relation_get(rid=relation_id, unit=unit) ssl_dir = os.path.join('/etc/apache2/ssl/', service_name) mkdir(path=ssl_dir) name = local_unit().replace('/', '_') certs = data.get('{}.processed_requests'.format(name)) chain = data.get('chain') ca = data.get('ca') if certs: certs = json.loads(certs) _manage_ca_certs(ca, relation_id) install_certs(ssl_dir, certs, chain, user=user, group=group) create_ip_cert_links(ssl_dir, custom_hostname_link=custom_hostname_link, bindings=bindings) return True return False
def zap(): if not hookenv.action_get('i-really-mean-it'): hookenv.action_fail('i-really-mean-it is a required parameter') return failed_devices = [] not_block_devices = [] try: devices = get_devices() except ZapDiskError as error: hookenv.action_fail("Failed due to: {}".format(error)) return for device in devices: if not is_block_device(device): not_block_devices.append(device) if (is_device_mounted(device) or is_active_bluestore_device(device) or is_mapped_luks_device(device)): failed_devices.append(device) if failed_devices or not_block_devices: message = "" if failed_devices: message = "{} devices are mounted: {}".format( len(failed_devices), ", ".join(failed_devices)) if not_block_devices: if len(message): message += "\n\n" message += "{} devices are not block devices: {}".format( len(not_block_devices), ", ".join(not_block_devices)) hookenv.action_fail(message) return db = kv() used_devices = db.get('osd-devices', []) for device in devices: zap_disk(device) if device in used_devices: used_devices.remove(device) db.set('osd-devices', used_devices) db.flush() hookenv.action_set({ 'message': "{} disk(s) have been zapped, to use them as OSDs, run: \n" "juju run-action {} add-disk osd-devices=\"{}\"".format( len(devices), hookenv.local_unit(), " ".join(devices)) })
def setup_nagios(nagios): """Setup nagios check """ conf = config() unit_name = local_unit() check_base = '/usr/lib/nagios/plugins/' process_check = check_base + 'check_procs' web_check = [process_check, '-c', '1:1', '-a', 'bin/redis-server'] nagios.add_check(web_check, name="redis-serverprocess", description="Check for redis-server process", context=conf['nagios_context'], servicegroups=conf['nagios_servicegroups'], unit=unit_name) set_flag('redis.nagios-setup.complete')
def get_config(): '''Gather and sanity-check volume configuration data''' volume_config = {} config = hookenv.config() errors = False if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'): volume_config['ephemeral'] = True else: volume_config['ephemeral'] = False try: volume_map = yaml.safe_load(config.get('volume-map', '{}')) except yaml.YAMLError as e: hookenv.log("Error parsing YAML volume-map: {}".format(e), hookenv.ERROR) errors = True if volume_map is None: # probably an empty string volume_map = {} elif not isinstance(volume_map, dict): hookenv.log("Volume-map should be a dictionary, not {}".format( type(volume_map))) errors = True volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME']) if volume_config['device'] and volume_config['ephemeral']: # asked for ephemeral storage but also defined a volume ID hookenv.log( 'A volume is defined for this unit, but ephemeral ' 'storage was requested', hookenv.ERROR) errors = True elif not volume_config['device'] and not volume_config['ephemeral']: # asked for permanent storage but did not define volume ID hookenv.log( 'Ephemeral storage was requested, but there is no volume ' 'defined for this unit.', hookenv.ERROR) errors = True unit_mount_name = hookenv.local_unit().replace('/', '-') volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name) if errors: return None return volume_config
def cluster_changed(): unison.ssh_authorized_peers(user=SSH_USER, group=SSH_USER, peer_interface='cluster', ensure_local_user=True) # NOTE(jamespage) re-echo passwords for peer storage echo_whitelist = [ '_passwd', 'identity-service:', 'db-initialised', 'ssl-cert-available-updates' ] # Don't echo if leader since a re-election may be in progress. if not is_leader(): echo_whitelist.append('ssl-cert-master') log("Peer echo whitelist: %s" % (echo_whitelist), level=DEBUG) peer_echo(includes=echo_whitelist, force=True) check_peer_actions() initialise_pki() if is_leader(): # Figure out if we need to mandate a sync units = get_ssl_sync_request_units() synced_units = relation_get_and_migrate(attribute='ssl-synced-units', unit=local_unit()) diff = None if synced_units: synced_units = json.loads(synced_units) diff = set(units).symmetric_difference(set(synced_units)) else: units = None if units and (not synced_units or diff): log("New peers joined and need syncing - %s" % (', '.join(units)), level=DEBUG) update_all_identity_relation_units_force_sync() else: update_all_identity_relation_units() if not is_leader() and is_ssl_cert_master(): # Force and sync and trigger a sync master re-election since we are not # leader anymore. force_ssl_sync() else: CONFIGS.write_all()
def conditional_neutron_migration(): if os_release('neutron-common') < 'kilo': log('Not running neutron database migration as migrations are handled ' 'by the neutron-server process or nova-cloud-controller charm.') return if is_elected_leader(CLUSTER_RES): allowed_units = relation_get('allowed_units') if allowed_units and local_unit() in allowed_units.split(): migrate_neutron_database() service_restart('neutron-server') else: log('Not running neutron database migration, either no' ' allowed_units or this unit is not present') return else: log('Not running neutron database migration, not leader')
def conditional_neutron_migration(): if CompareOpenStackReleases(os_release('neutron-server')) <= 'icehouse': log('Not running neutron database migration as migrations are handled ' 'by the neutron-server process.') return if is_elected_leader(CLUSTER_RES): allowed_units = relation_get('allowed_units') if allowed_units and local_unit() in allowed_units.split(): migrate_neutron_database() if not is_unit_paused_set(): service_restart('neutron-server') else: log('Not running neutron database migration, either no' ' allowed_units or this unit is not present') return else: log('Not running neutron database migration, not leader')
def _save_state(self): self.msg('Publishing state'.format(self._name())) if hookenv.is_leader(): # sort_keys to ensure stability. raw = json.dumps(self.grants, sort_keys=True) hookenv.leader_set({self.key: raw}) local_unit = hookenv.local_unit() if self.relid is None: # No peers relation yet. Fallback to local state. self.msg('No peer relation. Saving local state') self._save_local_state(self.requests[local_unit]) else: # sort_keys to ensure stability. raw = json.dumps(self.requests[local_unit], sort_keys=True) hookenv.relation_set(self.relid, relation_settings={self.key: raw})
def mark_broker_action_done(action, rid=None, unit=None): """Mark action as having been completed. @param action: name of action to be performed @returns None """ rdata = relation_get(rid, unit) or {} broker_rsp = rdata.get(get_broker_rsp_key()) if not broker_rsp: return rsp = CephBrokerRsp(broker_rsp) unit_name = local_unit().partition('/')[2] key = "unit_{}_ceph_broker_action.{}".format(unit_name, action) kvstore = kv() kvstore.set(key=key, value=rsp.request_id) kvstore.flush()
def https_services_tcp(service_name, vip, port): name = local_unit().replace("/", "-") addr = get_ip() return [ {"service_name": service_name, "service_host": vip, "service_port": port, "service_options": [ "mode tcp", "option tcplog", "balance source", "cookie SERVERID insert indirect nocache", ], "servers": [[ name, addr, port, "cookie " + addr + " weight 1 maxconn 1024 check port " + str(port)]]}, ]
def reset_default_password(): if hookenv.leader_get('default_admin_password_changed'): hookenv.log('Default admin password already changed') return # Cassandra ships with well known credentials, rather than # providing a tool to reset credentials. This is a huge security # hole we must close. try: # We need a big timeout here, as the cassandra user actually # springs into existence some time after Cassandra has started # up and is accepting connections. with helpers.connect('cassandra', 'cassandra', timeout=120, auth_timeout=120) as session: # But before we close this security hole, we need to use these # credentials to create a different admin account for the # leader, allowing it to create accounts for other nodes as they # join. The alternative is restarting Cassandra without # authentication, which this charm will likely need to do in the # future when we allow Cassandra services to be related together. helpers.status_set('maintenance', 'Creating initial superuser account') username, password = helpers.superuser_credentials() pwhash = helpers.encrypt_password(password) helpers.ensure_user(session, username, pwhash, superuser=True) helpers.set_unit_superusers([hookenv.local_unit()]) helpers.status_set('maintenance', 'Changing default admin password') helpers.query(session, 'ALTER USER cassandra WITH PASSWORD %s', cassandra.ConsistencyLevel.ALL, (host.pwgen(), )) except cassandra.AuthenticationFailed: hookenv.log('Default superuser account already reset') try: with helpers.connect(): hookenv.log("Leader's superuser account already created") except cassandra.AuthenticationFailed: # We have no known superuser credentials. Create the account # the hard, slow way. This will be the normal method # of creating the service's initial account when we allow # services to be related together. helpers.create_unit_superuser_hard() hookenv.leader_set(default_admin_password_changed=True)
def setup_sensu(info): application_version_set('0.29') rabbitmq = {'host': config()['rabbitmq'].split(':')[0], 'port': config()['rabbitmq'].split(':')[1], 'password': config()['password']} if config()['ssl_key'] != '': if not os.path.isdir(SSL_DIR): os.mkdir(SSL_DIR) with open('{}/ssl_key.pem'.format(SSL_DIR), 'w+') as ssl_key: ssl_key.write(config()['ssl_key']) with open('{}/ssl_cert.pem'.format(SSL_DIR), 'w+') as ssl_cert: ssl_cert.write(config()['ssl_cert']) rabbitmq['ssl_cert'] = '{}/ssl_cert.pem'.format(SSL_DIR) rabbitmq['ssl_key'] = '{}/ssl_key.pem'.format(SSL_DIR) name = '{}/{}'.format(os.environ['JUJU_MODEL_NAME'], os.environ['JUJU_MACHINE_ID']) application = os.environ['JUJU_REMOTE_UNIT'] unit = local_unit().replace('/', '-') render('rabbitmq.json', '{}/rabbitmq.json'.format(CONFIG_DIR), context=rabbitmq) client = {'name': name, 'public_ip': unit_public_ip(), 'subscriptions': '[\"monitoring\"]'} render('client.json', '{}/client.json'.format(CONFIG_DIR), context=client) render('transport.json', '{}/transport.json'.format(CONFIG_DIR), context={}) for plugin in config()['plugins'].split(' '): try: check_call(['sensu-install', '-p', plugin]) except CalledProcessError as e: status_set('blocked', e.output) if not os.path.isdir(os.path.join(CONFIG_DIR, unit)): os.mkdir(os.path.join(CONFIG_DIR, unit)) measurements = config()['measurements'].split(' ') try: checks = [ {'type': m.split('|')[0], 'script': m.split('|')[1], 'subscribers': application} for m in measurements ] render('checks.json', '{}/{}/checks.json'.format(CONFIG_DIR, unit), context={'checks': checks}) # When multiple sensu-clients are being used to monitor different sets of measurements, # open port would fail. Downside of this check is that juju status will not provide correct # info about open ports, except for the first Sensu client. try: open_port(3030) except CalledProcessError: pass service_restart('sensu-client') status_set('active', 'active (ready)') set_state('sensu.installed') except IndexError: status_set('blocked', 'Incorrect checks given in config')