def push_filebeat_index(elasticsearch): """Create the Filebeat index in Elasticsearch. Once elasticsearch is available, make 5 attempts to create a filebeat index. Set appropriate charm status so the operator knows when ES is configured to accept data. """ hosts = elasticsearch.list_unit_data() for host in hosts: host_string = "{}:{}".format(host['host'], host['port']) max_attempts = 5 for i in range(1, max_attempts): if push_beat_index(elasticsearch=host_string, service='filebeat', fatal=False): set_state('filebeat.index.pushed') status.active('Filebeat ready.') break else: msg = "Attempt {} to push filebeat index failed (retrying)".format( i) status.waiting(msg) time.sleep(i * 30) # back off 30s for each attempt else: msg = "Failed to push filebeat index to http://{}".format(host_string) status.blocked(msg)
def configure_bgp_globals(): status.maintenance('Configuring BGP globals') config = charm_config() try: try: bgp_config = calicoctl_get('bgpconfig', 'default') except CalledProcessError as e: if b'resource does not exist' in e.output: log('default BGPConfiguration does not exist') bgp_config = { 'apiVersion': 'projectcalico.org/v3', 'kind': 'BGPConfiguration', 'metadata': { 'name': 'default' }, 'spec': {} } else: raise spec = bgp_config['spec'] spec['asNumber'] = config['global-as-number'] spec['nodeToNodeMeshEnabled'] = config['node-to-node-mesh'] calicoctl_apply(bgp_config) except CalledProcessError: log(traceback.format_exc()) status.waiting('Waiting to retry BGP global configuration') return set_state('calico.bgp.globals.configured')
def configure_cni(): ''' Configure Calico CNI. ''' status.maintenance('Configuring Calico CNI') try: subnet = get_flannel_subnet() except FlannelSubnetNotFound: hookenv.log(traceback.format_exc()) status.waiting('Waiting for Flannel') return os.makedirs('/etc/cni/net.d', exist_ok=True) cni = endpoint_from_flag('cni.connected') etcd = endpoint_from_flag('etcd.available') cni_config = cni.get_config() context = { 'connection_string': etcd.get_connection_string(), 'etcd_key_path': ETCD_KEY_PATH, 'etcd_cert_path': ETCD_CERT_PATH, 'etcd_ca_path': ETCD_CA_PATH, 'kubeconfig_path': cni_config.get('kubeconfig_path', '/root/cdk/kubeconfig'), 'subnet': subnet } render('10-canal.conflist', '/etc/cni/net.d/10-canal.conflist', context) cni.set_config(cidr=config('cidr'), cni_conf_file='10-canal.conflist') set_state('canal.cni.configured')
def invoke_configure_network(etcd): ''' invoke network configuration and adjust states ''' status.maintenance('Negotiating flannel network subnet.') if configure_network(etcd): set_state('flannel.network.configured') remove_state('flannel.service.started') else: status.waiting('Waiting on etcd.')
def setup_rabbitmq(): rabbitmq = endpoint_from_flag('rabbitmq.connected') juju_app_name = (MODEL_NAME.replace("/", '-') + '.' + JUJU_UNIT_NAME.split('/')[0]) username = JUJU_UNIT_NAME.split('/')[0] vhost = '/' + juju_app_name rabbitmq.request_access(username, vhost) status.waiting('Waiting on RabbitMQ to configure vhost')
def ready(): ''' Indicate that canal is active. ''' failing_services = get_failing_services() if len(failing_services) > 0: msg = 'Waiting for service: {}'.format(', '.join(failing_services)) status.waiting(msg) else: try: status.active('Flannel subnet ' + get_flannel_subnet()) except FlannelSubnetNotFound: status.waiting('Waiting for Flannel')
def configure_calico_pool(): ''' Configure Calico IP pool. ''' config = charm_config() if not config['manage-pools']: log('Skipping pool configuration') set_state('calico.pool.configured') return status.maintenance('Configuring Calico IP pool') try: # remove unrecognized pools, and default pool if CIDR doesn't match pools = calicoctl_get('pool')['items'] cidrs = tuple(cidr.strip() for cidr in config['cidr'].split(',')) names = tuple('ipv{}'.format(get_network(cidr).version) for cidr in cidrs) pool_names_to_delete = [ pool['metadata']['name'] for pool in pools if pool['metadata']['name'] not in names or pool['spec']['cidr'] not in cidrs ] for pool_name in pool_names_to_delete: log('Deleting pool: %s' % pool_name) calicoctl('delete', 'pool', pool_name, '--skip-not-exists') for cidr, name in zip(cidrs, names): # configure the default pool pool = { 'apiVersion': 'projectcalico.org/v3', 'kind': 'IPPool', 'metadata': { 'name': name, }, 'spec': { 'cidr': cidr, 'ipipMode': config['ipip'], 'vxlanMode': config['vxlan'], 'natOutgoing': config['nat-outgoing'], } } calicoctl_apply(pool) except CalledProcessError: log(traceback.format_exc()) if config['ipip'] != 'Never' and config['vxlan'] != 'Never': status.blocked('ipip and vxlan configs are in conflict') else: status.waiting('Waiting to retry calico pool configuration') return set_state('calico.pool.configured')
def disable_vxlan_tx_checksumming(): '''Workaround for https://github.com/projectcalico/calico/issues/3145''' config = charm_config() if config['disable-vxlan-tx-checksumming'] and config['vxlan'] != 'Never': cmd = [ 'ethtool', '-K', 'vxlan.calico', 'tx-checksum-ip-generic', 'off' ] try: check_call(cmd) except CalledProcessError: msg = 'Waiting to retry disabling VXLAN TX checksumming' log(msg) status.waiting(msg)
def recycle_daemon(): """ Render the docker template files and restart the docker daemon on this system. :return: None """ hookenv.log("Restarting docker service.") write_drop_ins() reload_system_daemons() host.service_restart("docker") if not _probe_runtime_availability(): status.waiting("Container runtime not available.") return
def configure_calico_pool(): ''' Configure Calico IP pool. ''' status.maintenance('Configuring Calico IP pool') config = hookenv.config() context = { 'cidr': CALICO_CIDR, 'ipip': config['ipip'], 'nat_outgoing': 'true' if config['nat-outgoing'] else 'false', } render('pool.yaml', '/tmp/calico-pool.yaml', context) try: calicoctl('apply', '-f', '/tmp/calico-pool.yaml') except CalledProcessError: status.waiting('Waiting to retry calico pool configuration') return set_state('calico.pool.configured')
def update_status(): if not is_flag_set('vault.connected'): status.blocked('missing relation to vault') return if not is_flag_set('layer.vaultlocker.configured'): status.waiting('waiting for vaultlocker config') return ready, missing = [], [] for storage in ('secrets', 'secrets/0', 'multi-secrets', 'multi-secrets/0', 'multi-secrets/1', 'multi-secrets/2'): if is_flag_set('layer.vaultlocker.{}.ready'.format(storage)): ready.append(storage) else: missing.append(storage) status.active('ready: {}; missing: {}'.format(','.join(ready), ','.join(missing)))
def signal_workloads_start(): """ Signal to higher layers the container runtime is ready to run workloads. At this time the only reasonable thing we can do is determine if the container runtime is active. :return: None """ # Before we switch to active, probe the runtime to determine if # it is available for workloads. Assuming response from daemon # to be sufficient. if not _probe_runtime_availability(): status.waiting("Container runtime not available.") return status.active("Container runtime available.") set_state("docker.available")
def configure_kafka_connect_base(): kafka = endpoint_from_flag('kafka.ready') kubernetes = endpoint_from_flag('endpoint.kubernetes.available') kafka_brokers = [] for kafka_unit in kafka.kafkas(): kafka_brokers.append(kafka_unit['host'] + ':' + kafka_unit['port']) worker_config = generate_worker_config() worker_config['bootstrap.servers'] = ','.join(kafka_brokers) port = worker_config['rest.port'] if 'rest.port' in worker_config else 8083 uuid = kubernetes.get_uuid() resource_context = { 'configmap_name': 'cfgmap-{}'.format(uuid), 'label': uuid, 'properties': worker_config, 'service_name': 'svc-{}'.format(uuid), 'port': port, 'deployment_name': 'depl-{}'.format(uuid), 'replicas': conf.get('workers', 1), 'container_name': uuid, 'image': unitdata.kv().get('docker-image'), 'containerport': port, } if data_changed('resource-context', resource_context): # Trigger a rolling update by setting a new annotation in the deployment resource_context['configmap_annotation'] = hashlib.sha1(datetime.datetime.now() .isoformat() .encode('utf-8')).hexdigest() templating.render(source="resources.j2", target="/etc/kafka-connect/resources.yaml", context=resource_context) resources = [] with open('/etc/kafka-connect/resources.yaml', 'r') as f: docs = yaml.load_all(f) for doc in docs: resources.append(doc) kubernetes.send_create_request(resources) status.waiting('Waiting for k8s deployment (will happen in next hook)') set_flag('kafka-connect-base.configured')
def configure_calico_pool(): ''' Configure Calico IP pool. ''' config = charm_config() if not config['manage-pools']: log('Skipping pool configuration') set_state('calico.pool.configured') return status.maintenance('Configuring Calico IP pool') try: # remove unrecognized pools, and default pool if CIDR doesn't match pools = calicoctl_get('pool')['items'] pool_names_to_delete = [ pool['metadata']['name'] for pool in pools if pool['metadata']['name'] != 'default' or pool['spec']['cidr'] != config['cidr'] ] for pool_name in pool_names_to_delete: log('Deleting pool: %s' % pool_name) calicoctl('delete', 'pool', pool_name, '--skip-not-exists') # configure the default pool pool = { 'apiVersion': 'projectcalico.org/v3', 'kind': 'IPPool', 'metadata': { 'name': 'default' }, 'spec': { 'cidr': config['cidr'], 'ipipMode': config['ipip'], 'natOutgoing': config['nat-outgoing'] } } calicoctl_apply(pool) except CalledProcessError: log(traceback.format_exc()) status.waiting('Waiting to retry calico pool configuration') return set_state('calico.pool.configured')
def ready(): preconditions = [ 'calico.service.installed', 'calico.pool.configured', 'calico.cni.configured', 'calico.bgp.globals.configured', 'calico.node.configured', 'calico.bgp.peers.configured' ] if is_state('upgrade.series.in-progress'): status.blocked('Series upgrade in progress') return for precondition in preconditions: if not is_state(precondition): return if is_leader() and not is_state('calico.npc.deployed'): status.waiting('Waiting to retry deploying policy controller') return if not service_running('calico-node'): status.waiting('Waiting for service: calico-node') return status.active('Calico is active')
def deploy_network_policy_controller(): ''' Deploy the Calico network policy controller. ''' status.maintenance('Deploying network policy controller.') etcd = endpoint_from_flag('etcd.available') context = { 'connection_string': etcd.get_connection_string(), 'etcd_key_path': ETCD_KEY_PATH, 'etcd_cert_path': ETCD_CERT_PATH, 'etcd_ca_path': ETCD_CA_PATH, 'calico_policy_image': charm_config('calico-policy-image'), 'etcd_cert_last_modified': os.path.getmtime(ETCD_CERT_PATH) } render('policy-controller.yaml', '/tmp/policy-controller.yaml', context) try: kubectl('apply', '-f', '/tmp/policy-controller.yaml') set_state('calico.npc.deployed') except CalledProcessError as e: status.waiting('Waiting for kubernetes') log(str(e))
def configure_node(): status.maintenance('Configuring Calico node') node_name = gethostname() as_number = get_unit_as_number() route_reflector_cluster_id = get_route_reflector_cluster_id() try: node = calicoctl_get('node', node_name) node['spec']['bgp']['asNumber'] = as_number node['spec']['bgp']['routeReflectorClusterID'] = \ route_reflector_cluster_id calicoctl_apply(node) except CalledProcessError: log(traceback.format_exc()) status.waiting('Waiting to retry Calico node configuration') return set_state('calico.node.configured')
def create_calico_node_token(): ''' Create the system:calico-node user token ''' status.maintenance('Creating system:calico-node user token') token = kubernetes_common.token_generator() user = '******' success = kubernetes_common.create_secret(token=token, username=user, user=user) if not success: log('Failed to create system:calico-node user token, will retry') status.waiting('Waiting to retry creating calico-node token') return # create_secret may have added the <user>:: prefix. Get the new token. token = kubernetes_common.get_secret_password(user) if not token: log('Failed to get system:calico-node user token, will retry') status.waiting('Waiting to retry creating calico-node token') return leader_set({'calico-node-token': token})
def configure_calico_pool(etcd): ''' Configure Calico IP pool. ''' status.maintenance('Configuring Calico IP pool') # remove unrecognized pools try: output = calicoctl('get', 'pool', '-o', 'yaml').decode('utf-8') except CalledProcessError: log('Failed to get pools') status.waiting('Waiting to retry calico pool configuration') return pool_data = yaml.safe_load(output) pools = [item['metadata']['name'] for item in pool_data['items']] pools_to_delete = [pool for pool in pools if pool != 'default'] for pool in pools_to_delete: log('Deleting pool: %s' % pool) try: calicoctl('delete', 'pool', pool, '--skip-not-exists') except CalledProcessError: log('Failed to delete pool: %s' % pool) status.waiting('Waiting to retry calico pool configuration') return # configure the default pool config = hookenv.config() context = {'cidr': config['cidr']} render('pool.yaml', '/tmp/calico-pool.yaml', context) try: calicoctl('apply', '-f', '/tmp/calico-pool.yaml') except CalledProcessError: status.waiting('Waiting to retry calico pool configuration') return set_state('calico.pool.configured')
def register_node_with_leader(cluster): """ Control flow mechanism to perform self registration with the leader. Before executing self registration, we must adhere to the nature of offline static turnup rules. If we find a GUID in the member list without peering information the unit will enter a race condition and must wait for a clean status output before we can progress to self registration. """ etcdctl = EtcdCtl() bag = EtcdDatabag() leader_address = leader_get("leader_address") bag.leader_address = leader_address try: # Check if we are already registered. Unregister ourselves if we are so # we can register from scratch. peer_url = "https://%s:%s" % (bag.cluster_address, bag.management_port) members = etcdctl.member_list(leader_address) for _, member in members.items(): if member["peer_urls"] == peer_url: log("Found member that matches our peer URL. Unregistering...") etcdctl.unregister(member["unit_id"], leader_address) # Now register. resp = etcdctl.register(bag.__dict__) bag.set_cluster(resp["cluster"]) except EtcdCtl.CommandFailed: log("etcdctl.register failed, will retry") msg = "Waiting to retry etcd registration" status.waiting(msg) return render_config(bag) host.service_restart(bag.etcd_daemon) open_port(bag.port) set_state("etcd.registered")
def deploy_network_policy_controller(): ''' Deploy the Calico network policy controller. ''' status.maintenance('Applying registry credentials secret') # FIXME: We're just stealing a server key and cert from a random # worker. What should really go here? key_path = '/root/cdk/server.key' cert_path = '/root/cdk/server.crt' if not os.path.exists(key_path) or not os.path.exists(cert_path): msg = 'Waiting for cert generation' log(msg) status.waiting(msg) return etcd = endpoint_from_flag('etcd.available') encoded_creds = hookenv.config('registry-credentials') registry = hookenv.config('registry') etcd_cert_hash = get_etcd_cert_hash() apiserver_ips = get_apiserver_ips() templates = [] if encoded_creds: templates.append(('cnx-pull-secret.yaml', { 'credentials': encoded_creds })) templates += [('calico-config.yaml', { 'etcd_endpoints': etcd.get_connection_string() }), ('calico-etcd-secrets.yaml', { 'etcd_key': read_file_to_base64(ETCD_KEY_PATH), 'etcd_cert': read_file_to_base64(ETCD_CERT_PATH), 'etcd_ca': read_file_to_base64(ETCD_CA_PATH) }), ('calico-kube-controllers.yaml', { 'registry': registry, 'etcd_cert_hash': etcd_cert_hash }), ('cnx-manager-tls-secret.yaml', { 'key': read_file_to_base64(key_path), 'cert': read_file_to_base64(cert_path) }), ('cnx-etcd.yaml', { 'registry': registry, 'etcd_cert_hash': etcd_cert_hash }), ('cnx-policy.yaml', {})] # elasticsearch-operator junk # elasticsearch-operator requires vm.max_map_count>=262144 on the host if hookenv.config('enable-elasticsearch-operator'): check_call(['sysctl', 'vm.max_map_count=262144']) templates += [('elasticsearch-operator.yaml', { 'registry': registry }), ('monitor-calico.yaml', { 'apiserver_ips': json.dumps(apiserver_ips), 'registry': registry })] for template, context in templates: status.maintenance('Applying ' + template) dest = '/tmp/' + template render(template, dest, context) try: kubectl('apply', '-f', dest) except CalledProcessError: msg = 'Waiting to retry applying ' + template log(msg) status.waiting(msg) return license_key_b64 = hookenv.config('license-key') license_key = b64decode(license_key_b64).decode('utf-8') license_key_path = '/tmp/license-key.yaml' with open(license_key_path, 'w') as f: f.write(license_key) try: calicoctl('apply', '-f', license_key_path) except CalledProcessError: msg = 'Waiting to retry applying license-key' log(msg) status.waiting(msg) return db.set('tigera.apiserver_ips_used', apiserver_ips) set_state('calico.npc.deployed')
def configure_bgp_peers(): status.maintenance('Configuring BGP peers') peers = [] # Global BGP peers config = charm_config() peers += yaml.safe_load(config['global-bgp-peers']) # Subnet-scoped BGP peers subnet_bgp_peers = yaml.safe_load(config['subnet-bgp-peers']) subnets = filter_local_subnets(subnet_bgp_peers) for subnet in subnets: peers += subnet_bgp_peers[str(subnet)] # Unit-scoped BGP peers unit_id = get_unit_id() unit_bgp_peers = yaml.safe_load(config['unit-bgp-peers']) if unit_id in unit_bgp_peers: peers += unit_bgp_peers[unit_id] # Give names to peers safe_unit_name = local_unit().replace('/', '-') named_peers = { # name must consist of lower case alphanumeric characters, '-' or '.' '%s-%s-%s' % (safe_unit_name, peer['address'].replace(':', '-'), peer['as-number']): peer for peer in peers } try: node_name = gethostname() for peer_name, peer in named_peers.items(): peer_def = { 'apiVersion': 'projectcalico.org/v3', 'kind': 'BGPPeer', 'metadata': { 'name': peer_name, }, 'spec': { 'node': node_name, 'peerIP': peer['address'], 'asNumber': peer['as-number'] } } calicoctl_apply(peer_def) # Delete unrecognized peers existing_peers = calicoctl_get('bgppeers')['items'] existing_peers = [peer['metadata']['name'] for peer in existing_peers] peers_to_delete = [ peer for peer in existing_peers if peer.startswith(safe_unit_name + '-') and peer not in named_peers ] for peer in peers_to_delete: calicoctl('delete', 'bgppeer', peer) except CalledProcessError: log(traceback.format_exc()) status.waiting('Waiting to retry BGP peer configuration') return set_state('calico.bgp.peers.configured')
def waiting_messaging(): status.waiting('Waiting for: elasticsearch, logstash or kafka.')
def waiting_for_db(): status.waiting('Waiting for database relation or configuration') set_flag('fresh-rss.db.waiting')
def ready(): if not service_running('calico-node'): status.waiting('Waiting for service: calico-node') else: status.active('Calico is active')
def ready(): ''' Indicate that flannel is active. ''' try: status.active('Flannel subnet ' + get_flannel_subnet()) except FlannelSubnetNotFound: status.waiting('Waiting for Flannel')