def __init__(self): self.db = unitdata.kv() self.cluster_bind_address = self.get_bind_address("cluster") self.db_bind_address = self.get_bind_address("db") self.port = config("port") self.management_port = config("management_port") # Live polled properties self.public_address = unit_get("public-address") self.cluster_address = get_ingress_address("cluster") self.db_address = get_ingress_address("db") self.unit_name = os.getenv("JUJU_UNIT_NAME").replace("/", "") # Pull the TLS certificate paths from layer data tls_opts = layer.options("tls-client") ca_path = tls_opts["ca_certificate_path"] crt_path = tls_opts["server_certificate_path"] key_path = tls_opts["server_key_path"] # Pull the static etcd configuration from layer-data etcd_opts = layer.options("etcd") self.etcd_conf_dir = etcd_opts["etcd_conf_dir"] # This getter determines the current context of the storage path # depending on if durable storage is mounted. self.etcd_data_dir = self.storage_path() self.etcd_daemon = etcd_opts["etcd_daemon_process"] self.ca_certificate = ca_path self.server_certificate = crt_path self.server_key = key_path # Cluster concerns self.cluster = self.db.get("etcd.cluster", "") self.token = self.cluster_token() self.cluster_state = self.db.get("etcd.cluster-state", "existing")
def __init__(self): self.db = unitdata.kv() self.cluster_bind_address = self.get_bind_address('cluster') self.db_bind_address = self.get_bind_address('db') self.port = config('port') self.management_port = config('management_port') # Live polled properties self.public_address = unit_get('public-address') self.cluster_address = get_ingress_address('cluster') self.db_address = get_ingress_address('db') self.unit_name = os.getenv('JUJU_UNIT_NAME').replace('/', '') # Pull the TLS certificate paths from layer data tls_opts = layer.options('tls-client') ca_path = tls_opts['ca_certificate_path'] crt_path = tls_opts['server_certificate_path'] key_path = tls_opts['server_key_path'] # Pull the static etcd configuration from layer-data etcd_opts = layer.options('etcd') self.etcd_conf_dir = etcd_opts['etcd_conf_dir'] # This getter determines the current context of the storage path # depending on if durable storage is mounted. self.etcd_data_dir = self.storage_path() self.etcd_daemon = etcd_opts['etcd_daemon_process'] self.ca_certificate = ca_path self.server_certificate = crt_path self.server_key = key_path # Cluster concerns self.token = self.cluster_token() self.cluster_state = 'existing'
def prepare_tls_certificates(tls): status_set('maintenance', 'Requesting tls certificates.') common_name = hookenv.unit_public_ip() sans = set() sans.add(hookenv.unit_public_ip()) sans.add(get_ingress_address('db')) sans.add(get_ingress_address('cluster')) sans.add(socket.gethostname()) sans = list(sans) certificate_name = hookenv.local_unit().replace('/', '_') tls.request_server_cert(common_name, sans, certificate_name)
def leader_config_changed(): ''' The leader executes the runtime configuration update for the cluster, as it is the controlling unit. Will render config, close and open ports and restart the etcd service.''' configuration = hookenv.config() previous_port = configuration.previous('port') log('Previous port: {0}'.format(previous_port)) previous_mgmt_port = configuration.previous('management_port') log('Previous management port: {0}'.format(previous_mgmt_port)) if previous_port and previous_mgmt_port: bag = EtcdDatabag() etcdctl = EtcdCtl() members = etcdctl.member_list() # Iterate over all the members in the list. for unit_name in members: # Grab the previous peer url and replace the management port. peer_urls = members[unit_name]['peer_urls'] log('Previous peer url: {0}'.format(peer_urls)) old_port = ':{0}'.format(previous_mgmt_port) new_port = ':{0}'.format(configuration.get('management_port')) url = peer_urls.replace(old_port, new_port) # Update the member's peer_urls with the new ports. log(etcdctl.member_update(members[unit_name]['unit_id'], url)) # Render just the leaders configuration with the new values. render_config() # Close the previous client port and open the new one. close_open_ports() address = get_ingress_address('cluster') leader_set({ 'leader_address': get_connection_string([address], bag.management_port) }) host.service_restart(bag.etcd_daemon)
def register_prometheus_jobs(): # This function is not guarded with `when_not("prometheus.configured")` # to account for possible changes of etcd units IP adresses and for when # etcd units are added/removed. Repeated calls to `prometheus.register_job()` # have no effect unless job_data changes. log("Registering Prometheus metrics collection.") prometheus = endpoint_from_flag("endpoint.prometheus.joined") cluster = endpoint_from_flag("cluster.joined") peer_ips = cluster.get_db_ingress_addresses() if cluster else [] peer_ips.append(get_ingress_address("db")) targets = ["{}:{}".format(ip, config("port")) for ip in peer_ips] log("Configuring Prometheus scrape targets: {}".format(targets), DEBUG) prometheus.register_job( job_name="etcd", job_data={ "scheme": "https", "static_configs": [ { "targets": targets }, ], }, ) set_flag("prometheus.configured")
def initialize_new_leader(): ''' Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one. ''' bag = EtcdDatabag() bag.token = bag.token bag.cluster_state = 'new' address = get_ingress_address('cluster') cluster_connection_string = get_connection_string([address], bag.management_port) bag.cluster = "{}={}".format(bag.unit_name, cluster_connection_string) render_config(bag) host.service_restart(bag.etcd_daemon) # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy.') return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([address], bag.port) leader_set({'token': bag.token, 'leader_address': leader_connection_string, 'cluster': bag.cluster}) # finish bootstrap delta and set configured state set_state('etcd.leader.configured')
def initialize_new_leader(): """Create an initial cluster string to bring up a single member cluster of etcd, and set the leadership data so the followers can join this one.""" bag = EtcdDatabag() bag.token = bag.token bag.set_cluster_state("new") address = get_ingress_address("cluster") cluster_connection_string = get_connection_string([address], bag.management_port) bag.set_cluster("{}={}".format(bag.unit_name, cluster_connection_string)) render_config(bag) host.service_restart(bag.etcd_daemon) # sorry, some hosts need this. The charm races with systemd and wins. time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if "unhealthy" in status: status.blocked("Cluster not healthy.") return # We have a healthy leader, broadcast initial data-points for followers open_port(bag.port) leader_connection_string = get_connection_string([address], bag.port) leader_set({"leader_address": leader_connection_string, "cluster": bag.cluster}) # set registered state since if we ever become a follower, we will not need # to re-register set_state("etcd.registered") # finish bootstrap delta and set configured state set_state("etcd.leader.configured")
def send_single_connection_details(db): ''' ''' cert = read_tls_cert('client.crt') key = read_tls_cert('client.key') ca = read_tls_cert('ca.crt') etcdctl = EtcdCtl() # Set the key and cert on the db relation db.set_client_credentials(key, cert, ca) bag = EtcdDatabag() # Get all the peers participating in the cluster relation. address = get_ingress_address('db') members = [address] # Create a connection string with this member on the configured port. connection_string = get_connection_string(members, bag.port) # Set the connection string on the db relation. db.set_connection_string(connection_string, version=etcdctl.version())
def send_cluster_connection_details(cluster, db): ''' Need to set the cluster connection string and the client key and certificate on the relation object. ''' cert = read_tls_cert('client.crt') key = read_tls_cert('client.key') ca = read_tls_cert('ca.crt') etcdctl = EtcdCtl() # Set the key, cert, and ca on the db relation db.set_client_credentials(key, cert, ca) port = hookenv.config().get('port') # Get all the peers participating in the cluster relation. members = cluster.get_db_ingress_addresses() # Append our own address to the membership list, because peers dont self # actualize address = get_ingress_address('db') members.append(address) members.sort() # Create a connection string with all the members on the configured port. connection_string = get_connection_string(members, port) # Set the connection string on the db relation. db.set_connection_string(connection_string, version=etcdctl.version())
def set_db_ingress_address(cluster): ''' Send db ingress address to peers on the cluster relation ''' address = get_ingress_address('db') cluster.set_db_ingress_address(address)
def set_db_ingress_address(cluster): """Send db ingress address to peers on the cluster relation""" address = get_ingress_address("db") cluster.set_db_ingress_address(address)
import sys import time import yaml opts = layer.options('etcd') DATESTAMP = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') ARCHIVE = "etcd-data-{}.tar.gz".format(DATESTAMP) unit_name = os.getenv('JUJU_UNIT_NAME').replace('/', '') ETCD_DATA_DIR = '{}/{}.etcd'.format(opts['etcd_data_dir'], unit_name) if not os.path.isdir(ETCD_DATA_DIR): ETCD_DATA_DIR = opts['etcd_data_dir'] ETCD_PORT = config('management_port') CLUSTER_ADDRESS = get_ingress_address('cluster') SKIP_BACKUP = action_get('skip-backup') SNAPSHOT_ARCHIVE = resource_get('snapshot') TARGET_PATH = action_get('target') def preflight_check(): ''' Check preconditions for data restoration ''' if not SNAPSHOT_ARCHIVE: action_fail({'result.failed': 'Missing snapshot. See: README.md'}) sys.exit(0) def render_backup(): ''' Backup existing data in the event of restoration on a dirty unit. ''' if not os.path.isdir(ETCD_DATA_DIR) and SKIP_BACKUP:
def register_node_with_leader(cluster): ''' Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. ''' # We're going to communicate with the leader, and we need our bootstrap # startup string once.. TBD after that. etcdctl = EtcdCtl() bag = EtcdDatabag() # Assume a hiccup during registration and attempt a retry if bag.cluster_unit_id: bag.cluster = bag.registration_peer_string # conf_path = '{}/etcd.conf'.format(bag.etcd_conf_dir) render_config(bag) time.sleep(2) try: peers = etcdctl.member_list(leader_get('leader_address')) except CalledProcessError: log("Etcd attempted to invoke registration before service ready") # This error state is transient, and does not imply the unit is broken. # Erroring at this stage can be resolved, and should not effect the # overall condition of unit turn-up. Return from the method and let the # charm re-invoke on next run return for unit in peers: if 'client_urls' not in peers[unit].keys(): msg = 'Waiting for unit to complete registration.' if ('peer_urls' in peers[unit].keys() and peers[unit]['peer_urls'] and get_ingress_address('cluster') in peers[unit]['peer_urls'] and # noqa not host.service_running(bag.etcd_daemon)): # We have a peer that is unstarted and it is this node. # We do not run etcd now. Instead of blocking everyone # try to self-unregister. try: leader_address = leader_get('leader_address') msg = 'Etcd service did not start. Will retry soon.' etcdctl.unregister(peers[unit]['unit_id'], leader_address) except CalledProcessError: log('Notice: Unit failed to unregister', 'WARNING') # we cannot register. State not attainable. status_set('waiting', msg) return if not bag.cluster_unit_id: bag.leader_address = leader_get('leader_address') resp = etcdctl.register(bag.__dict__) if resp and 'cluster_unit_id' in resp.keys() and 'cluster' in resp.keys(): # noqa bag.cache_registration_detail('cluster_unit_id', resp['cluster_unit_id']) bag.cache_registration_detail('registration_peer_string', resp['cluster']) bag.cluster_unit_id = resp['cluster_unit_id'] bag.cluster = resp['cluster'] else: log('etcdctl.register failed, will retry') msg = 'Waiting to retry etcd registration' status_set('waiting', msg) return render_config(bag) host.service_restart(bag.etcd_daemon) time.sleep(2) # Check health status before we say we are good etcdctl = EtcdCtl() status = etcdctl.cluster_health() if 'unhealthy' in status: status_set('blocked', 'Cluster not healthy.') return open_port(bag.port) set_state('etcd.registered')