def _upgrade_k8s_master(upd, with_testing): # ServiceAccount signing key upd.print_log("Generating key for service account") helpers.local(""" mkdir -m o-rwx -p `dirname {key}` openssl genrsa -out {key} 2048 chmod -R 0440 {key} chown -R kube:kube `dirname {key}` """.format(key=SA_KEY)) upd.print_log("Updating apiserver config") helpers.update_local_config_file( "/etc/kubernetes/apiserver", { "KUBE_API_ARGS": {"--service_account_key_file=": SA_KEY} } ) helpers.update_local_config_file( "/etc/kubernetes/apiserver", { "KUBE_ADMISSION_CONTROL": {"--admission_control=": "NamespaceLifecycle,NamespaceExists"}} ) upd.print_log("Updating controller-manager config") helpers.update_local_config_file( "/etc/kubernetes/controller-manager", { "KUBE_CONTROLLER_MANAGER_ARGS": {"--service_account_private_key_file=": SA_KEY} } )
def upgrade(upd, with_testing, *args, **kwargs): upd.print_log('Generating new auth config file for nodes...') with open('/etc/kubernetes/kubelet_token.dat') as f: data = json.load(f) token = data['BearerToken'] with open('/etc/kubernetes/configfile_for_nodes', 'w') as f: f.write(configfile.format(token, MASTER_IP)) upd.print_log('Changing config files...') upd.print_log('1) controller-manager', helpers.local('mv /etc/kubernetes/controller-manager.rpmnew ' '/etc/kubernetes/controller-manager')) upd.print_log('2) kube-apiserver') with open('/etc/kubernetes/apiserver') as f: data = f.read().replace('--portal_net', '--service-cluster-ip-range') data = data.replace('AutoProvision,LimitRanger', 'Lifecycle,NamespaceExists,LimitRanger,SecurityContextDeny,ServiceAccount') data = data.replace('--public_address_override', '--bind-address') with open('/etc/kubernetes/apiserver', 'w') as f: f.write(data) upd.print_log('Done.') upd.print_log('Trying to restart master kubernetes...') service, code = helpers.restart_master_kubernetes(with_enable=True) if code != 0: raise helpers.UpgradeError('Kubernetes not restarted. ' 'Service {0} code {1}'.format(service, code)) else: upd.print_log('Deleting old token file', helpers.local('rm -f /etc/kubernetes/kubelet_token.dat')) helpers.local('rm -f /etc/kubernetes/apiserver.rpmnew')
def _update_00191_upgrade(upd, calico_network): etcd1 = helpers.local('uname -n') _master_etcd_cert(etcd1) _master_etcd_conf(etcd1) helpers.restart_service('etcd') _master_docker(upd) _master_firewalld() _master_k8s_node() if helpers.local('docker ps --format "{{.Names}}" | grep "^calico-node$"' ) != 'calico-node': _master_calico(upd, calico_network) _master_k8s_extensions() helpers.restart_master_kubernetes() helpers.local('echo "{0}" | kubectl create -f -'.format(_K8S_EXTENSIONS)) # we need to restart here again, because kubernetes sometimes don't accept # extensions onfly helpers.restart_master_kubernetes() _master_network_policy(upd, calico_network) _master_dns_policy() _master_pods_policy() _master_service_update()
def _downgrade_k8s_master(upd, with_testing): upd.print_log("Removing service account key.") helpers.local('rm -rf `dirname %s`' % SA_KEY) upd.print_log("Updating apiserver config") helpers.update_local_config_file( '/etc/kubernetes/apiserver', { "KUBE_API_ARGS": {"--service_account_key_file=": None} } ) helpers.update_local_config_file( "/etc/kubernetes/apiserver", { "KUBE_ADMISSION_CONTROL": { "--admission_control=": "NamespaceLifecycle," "NamespaceExists," "SecurityContextDeny" } } ) upd.print_log("Updating controller-manager config") helpers.update_local_config_file( '/etc/kubernetes/controller-manager', { "KUBE_CONTROLLER_MANAGER_ARGS": {"--service_account_private_key_file=": None} } )
def _master_calico(upd, calico_network): rv = helpers.local( 'ETCD_AUTHORITY=127.0.0.1:4001 /opt/bin/calicoctl pool add ' '{} --ipip --nat-outgoing'.format(calico_network)) if rv.failed: raise helpers.UpgradeError("Can't add calicoctl pool: {}".format(rv)) for i in range(3): helpers.local('sync') rv = helpers.local('docker pull kuberdock/calico-node:0.22.0-kd2') if not rv.failed: break upd.print_log("Pull calico-node failed. Doing retry {}".format(i)) sleep(10) if rv.failed: raise helpers.UpgradeError( "Can't pull calico-node image after 3 retries: {}".format(rv)) helpers.local("sync && sleep 5") rv = helpers.local( 'ETCD_AUTHORITY=127.0.0.1:4001 /opt/bin/calicoctl node ' '--ip="{0}" --node-image=kuberdock/calico-node:0.22.0-kd2'.format( MASTER_IP)) if rv.failed: raise helpers.UpgradeError("Can't start calico node: {}".format(rv)) helpers.local("sync && sleep 5")
def _downgrade_etcd(upd): upd.print_log('Downgrading etcd...') cp = NonTransformConfigParser() with open(ETCD_SERVICE_FILE) as f: cp.readfp(f) cp.set('Service', 'Type', 'simple') with open(ETCD_SERVICE_FILE, "w") as f: cp.write(f) helpers.local('systemctl daemon-reload', capture=False) helpers.local('systemctl restart etcd', capture=False)
def _master_pods_policy(): pods = Pod.query.filter(Pod.status != 'deleted') for pod in pods: namespace = pod.get_dbconfig()['namespace'] owner_repr = str(pod.owner.id) helpers.local('kubectl annotate ns {0} ' '"net.alpha.kubernetes.io/network-isolation=yes" ' '--overwrite=true'.format(namespace)) helpers.local('kubectl label ns {0} "kuberdock-user-uid={1}" ' '--overwrite=true'.format(namespace, owner_repr)) rv = podcollection._get_network_policy_api().post( ['networkpolicys'], json.dumps(podcollection.allow_same_user_policy(owner_repr)), rest=True, ns=namespace)
def _master_docker(upd): helpers.local("mkdir -p /etc/systemd/system/docker.service.d/") helpers.local( "cat << EOF > /etc/systemd/system/docker.service.d/timeouts.conf\n" "{}\nEOF".format(DOCKER_TIMEOUTS_DROPIN)) helpers.local('systemctl daemon-reload') upd.print_log(helpers.local('systemctl reenable docker')) upd.print_log(helpers.restart_service('docker')) # Just to be sure and see output in logs: upd.print_log(helpers.local('docker info'))
def _update_nonfloating_config(upd): upd.print_log('Updating kube-scheduler config...') helpers.local( 'sed -i "s/--enable-non-floating-ip/--enable-fixed-ip-pools/" /etc/kubernetes/scheduler' ) helpers.local('systemctl restart kube-scheduler') upd.print_log('Updating kuberdock main config...') helpers.local( 'sed -i "s/NONFLOATING_PUBLIC_IPS/FIXED_IP_POOLS/" {}'.format( KUBERDOCK_MAIN_CONFIG))
def checkout_calico_network(): cp = ConfigParser.ConfigParser() cp.read(KUBERDOCK_MAIN_CONFIG) try: v = cp.get('main', 'CALICO_NETWORK') except ConfigParser.Error: v = None if v: return v nets = helpers.local( "ip -o -4 addr | grep -vP '\slo\s' | awk '{print $4}'") calico_network = get_calico_network(nets) if not calico_network: raise helpers.UpgradeError("Can't find suitable network for Calico") cp.set('main', 'CALICO_NETWORK', calico_network) with open(KUBERDOCK_MAIN_CONFIG, 'wb') as configfile: cp.write(configfile) return calico_network
def upgrade(upd, with_testing, *args, **kwargs): # 00090_update.py upd.print_log('Update system settings scheme...') helpers.upgrade_db() redis = ConnectionPool.get_connection() billing_apps_link = SystemSettings.get_by_name('billing_apps_link') persitent_disk_max_size = SystemSettings.get_by_name('persitent_disk_max_size') # backup for downgrade if not redis.get('old_billing_apps_link'): redis.set('old_billing_apps_link', billing_apps_link or '', ex=int(timedelta(days=7).total_seconds())) if not redis.get('old_persitent_disk_max_size'): redis.set('old_persitent_disk_max_size', persitent_disk_max_size, ex=int(timedelta(days=7).total_seconds())) billing_url = (urlparse(billing_apps_link)._replace(path='', query='', params='').geturl() if billing_apps_link else None) SystemSettings.query.delete() add_system_settings() SystemSettings.set_by_name( 'persitent_disk_max_size', persitent_disk_max_size, commit=False) SystemSettings.set_by_name('billing_url', billing_url, commit=False) db.session.commit() # 00094_update.py upd.print_log('Drop table "node_missed_actions" if exists') table = Table('node_missed_actions', db.metadata) table.drop(bind=db.engine, checkfirst=True) db.session.commit() # 00095_update.py upd.print_log('Restart k8s2etcd service') upd.print_log(helpers.local('systemctl restart kuberdock-k8s2etcd')) # 00098_update.py copyfile('/var/opt/kuberdock/conf/sudoers-nginx.conf', '/etc/sudoers.d/nginx') local('chown nginx:nginx /etc/nginx/conf.d/shared-kubernetes.conf') local('chown nginx:nginx /etc/nginx/conf.d/shared-etcd.conf') helpers.close_all_sessions()
def upgrade(cls, upd): upd.print_log('Update influxdb...') # remove old version with all settings and all data helpers.stop_service('influxdb') helpers.local('rm -rf /opt/influxdb') helpers.local('rm /etc/systemd/system/influxdb.service') if os.path.isdir('/var/lib/influxdb/'): helpers.local('chown -R influxdb /var/lib/influxdb/') helpers.local('chgrp -R influxdb /var/lib/influxdb/') helpers.local('systemctl daemon-reload') # install new version helpers.local('systemctl reenable influxdb') helpers.local('systemctl restart influxdb') # wait starting t = 1 success = False ping_url = 'http://%s:%s/ping' % ( settings.INFLUXDB_HOST, settings.INFLUXDB_PORT) for _ in xrange(5): try: requests.get(ping_url) except requests.ConnectionError: sleep(t) t *= 2 else: success = True break if not success: raise helpers.UpgradeError('Influxdb does not answer to ping') # initialization helpers.local( 'influx -execute ' '"create user {u} with password \'{p}\' with all privileges"' .format(u=settings.INFLUXDB_USER, p=settings.INFLUXDB_PASSWORD)) helpers.local('influx -execute "create database {db}"' .format(db=settings.INFLUXDB_DATABASE))
def _master_etcd_conf(etcd1): conf = _MASTER_ETCD_CONF.format(MASTER_IP, etcd1) helpers.local('echo "{0}" > /etc/etcd/etcd.conf'.format(conf))
def _master_etcd_cert(etcd1): helpers.local('rm -f /root/.etcd-ca/{0}.host.crt'.format(etcd1)) helpers.local('rm -f /root/.etcd-ca/{0}.host.csr'.format(etcd1)) helpers.local('rm -f /root/.etcd-ca/{0}.host.key'.format(etcd1)) helpers.local( 'etcd-ca --depot-path /root/.etcd-ca new-cert --ip "{0},127.0.0.1" ' '--passphrase "" {1}'.format(MASTER_IP, etcd1)) helpers.local('etcd-ca --depot-path /root/.etcd-ca sign --passphrase "" ' '{0}'.format(etcd1)) helpers.local('etcd-ca --depot-path /root/.etcd-ca export {0} --insecure ' '--passphrase "" | tar -xf -'.format(etcd1)) helpers.local('mv -f {0}.crt /etc/pki/etcd/'.format(etcd1)) helpers.local('mv -f {0}.key.insecure /etc/pki/etcd/{0}.key'.format(etcd1))
def downgrade(upd, with_testing, exception, *args, **kwargs): helpers.local('yum downgrade kubernetes-master --enablerepo=kube') helpers.restart_master_kubernetes()
def _master_firewalld(): helpers.local('systemctl stop firewalld') helpers.local('systemctl disable firewalld') helpers.install_package('firewalld', action='remove') helpers.local('systemctl daemon-reload')
def _master_flannel(): for cmd in RM_FLANNEL_COMMANDS_MASTER: helpers.local(cmd) helpers.install_package('flannel', action='remove') helpers.local('systemctl daemon-reload')
def upgrade(upd, with_testing, *args, **kwargs): upd.print_log('Test_master_upgrade 1', helpers.local('uname -a')) helpers.upgradedb()
def upgrade(upd, with_testing, *args, **kwargs): if CEPH: helpers.local( """sed -i '/^KUBE_ALLOW_PRIV/ {s/--allow_privileged=false/--allow_privileged=true/}' /etc/kubernetes/config""" ) helpers.local('systemctl restart kube-apiserver')
def _master_k8s_extensions(): helpers.local( 'sed -i "/^KUBE_API_ARGS/ {s|\\"$| --runtime-config=' 'extensions/v1beta1=true,extensions/v1beta1/thirdpartyresources=' 'true\\"|}" /etc/kubernetes/apiserver')
def _master_k8s_node(): helpers.install_package(CONNTRACK_PACKAGE) helpers.local('systemctl reenable kube-proxy') helpers.restart_service('kube-proxy')
def downgrade(upd, with_testing, exception, *args, **kwargs): upd.print_log('Reloading flannel...') helpers.local('systemctl daemon-reload') helpers.local('systemctl restart flanneld')
def upgrade(upd, with_testing, *args, **kwargs): # 00085_update.py upd.print_log('Add default Persistent Disks size in pods config...') pods = Pod.query.all() for pod in pods: upd.print_log('Processing pod {0}'.format(pod.name)) config = pod.get_dbconfig() config['volumes_public'] = with_size(config.get('volumes_original', []), pod.owner_id) pod.set_dbconfig(config, save=False) for pod in pods: config = pod.get_dbconfig() config.pop('volumes_original', None) pod.set_dbconfig(config, save=False) db.session.commit() # 00086_update.py upd.print_log('Update kubes to hard limits') internal = Kube.get_by_name('Internal service') if internal: internal.cpu = 0.02 internal.save() small = Kube.get_by_name('Small') if small: small.cpu = 0.05 small.save() standard = Kube.get_by_name('Standard') if standard: standard.cpu = 0.25 standard.save() high = Kube.get_by_name('High memory') if high: high.cpu = 0.5 high.save() upd.print_log('Setup k8s2etcd middleware service') upd.print_log( helpers.local( "cat > /etc/systemd/system/kuberdock-k8s2etcd.service << 'EOF' {0}" .format(SERVICE_FILE)) ) helpers.local('systemctl daemon-reload') upd.print_log(helpers.local('systemctl reenable kuberdock-k8s2etcd')) upd.print_log(helpers.local('systemctl restart kuberdock-k8s2etcd')) upd.print_log('Add after etcd.service to kube-apiserver service file') upd.print_log( helpers.local( "cat > /etc/systemd/system/kube-apiserver.service << 'EOF' {0}" .format(K8S_API_SERVICE_FILE)) ) upd.print_log('Turn off watch-cache in kube_apiserver') lines = [] with open(KUBE_API_SERVER_PATH) as f: lines = f.readlines() with open(KUBE_API_SERVER_PATH, 'w+') as f: for line in lines: if (KUBE_API_SERVER_ARG in line and not KUBE_API_WATCHCACHE_DISABLE in line): s = line.split('"') s[1] += KUBE_API_WATCHCACHE_DISABLE line = '"'.join(s) f.write(line) helpers.restart_master_kubernetes(with_enable=True) # 00087_update.py upd.print_log('Upgrade namespaces for PD...') config = ConfigParser.RawConfigParser() config.read(KUBERDOCK_SETTINGS_FILE) ns = MASTER_IP if not config.has_option('main', 'PD_NAMESPACE'): if CEPH: # Store default CEPH pool as namespace. It already was used # by KD cluster, so we will not change it. ns = OLD_DEFAULT_CEPH_POOL config.set('main', 'PD_NAMESPACE', ns) with open(KUBERDOCK_SETTINGS_FILE, 'wb') as fout: config.write(fout) if CEPH: # Set 'rbd' for all existing ceph drives, because it was a default pool PersistentDisk.query.filter( ~PersistentDisk.drive_name.contains(PD_NS_SEPARATOR) ).update( {PersistentDisk.drive_name: \ OLD_DEFAULT_CEPH_POOL + PD_NS_SEPARATOR + \ PersistentDisk.drive_name }, synchronize_session=False ) db.session.commit() try: pstorage.check_namespace_exists(namespace=ns) except pstorage.NoNodesError: # skip CEPH pool checking if there are no nodes with CEPH pass # Restart kuberdock to prevent loss of PD bind state, becuase fix for this # is in the new version. helpers.restart_service('emperor.uwsgi')
def upgrade(cls, upd): upd.print_log('Start heapster service...') helpers.local('systemctl reenable heapster') helpers.local('systemctl restart heapster')
def downgrade(cls, upd): upd.print_log('Stop and disable heapster service...') helpers.local('systemctl stop heapster') helpers.local('systemctl disable heapster')
def _master_network_policy(upd, calico_network): RULE_NEXT_TIER = { "id": "next-tier", "order": 9999, "inbound_rules": [{ "action": "next-tier" }], "outbound_rules": [{ "action": "next-tier" }], "selector": "all()" } helpers.local("etcdctl set /calico/v1/policy/tier/failsafe/metadata " "'{\"order\": 0}'") helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-hosts/metadata " "'{\"order\": 5}'") helpers.local( 'etcdctl mkdir /calico/v1/policy/tier/kuberdock-hosts/policy') helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-hosts/policy/next-tier " "'{}'".format(json.dumps(RULE_NEXT_TIER))) helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-nodes/metadata " "'{\"order\": 10}'") helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-service/metadata " "'{\"order\": 20}'") helpers.local( 'etcdctl mkdir /calico/v1/policy/tier/kuberdock-service/policy') helpers.local( "etcdctl set /calico/v1/policy/tier/kuberdock-service/policy/next-tier " "'{}'".format(json.dumps(RULE_NEXT_TIER))) KD_HOST_ROLE = 'kdnode' helpers.local("sync && sleep 5") upd.print_log('Trying to get master tunnel IP...') retry_pause = 3 max_retries = 10 MASTER_TUNNEL_IP = retry(get_calico_ip_tunnel_address, retry_pause, max_retries) upd.print_log('Master tunnel IP is: {}'.format(MASTER_TUNNEL_IP)) if not MASTER_TUNNEL_IP: raise helpers.UpgradeError("Failed to get master tunnel IP") KD_NODES_NEXT_TIER_FOR_PODS = { "id": "kd-nodes-dont-drop-pods-traffic", "selector": "has(kuberdock-pod-uid)", "order": 50, "inbound_rules": [{ "action": "next-tier" }], "outbound_rules": [{ "action": "next-tier" }] } KD_NODES_POLICY = { "id": "kd-nodes-public", "selector": 'role=="{}"'.format(KD_HOST_ROLE), "order": 100, "inbound_rules": [ { "src_net": "{}/32".format(MASTER_IP), "action": "allow" }, { "src_net": "{}/32".format(MASTER_TUNNEL_IP), "action": "allow" }, { "protocol": "tcp", "dst_ports": [22], "action": "allow" }, ], "outbound_rules": [{ "action": "allow" }] } helpers.local( "etcdctl set " "/calico/v1/policy/tier/kuberdock-nodes/policy/kuberdock-nodes '{}'". format(json.dumps(KD_NODES_POLICY))) helpers.local( "etcdctl set " "/calico/v1/policy/tier/kuberdock-nodes/policy/pods-next-tier '{}'". format(json.dumps(KD_NODES_NEXT_TIER_FOR_PODS))) KD_MASTER_ROLE = 'kdmaster' master_public_tcp_ports = [22, 80, 443, 6443, 2379, 8123, 8118] master_public_udp_ports = [123] KD_MASTER_POLICY = { "id": "kdmaster-public", "selector": 'role=="{}"'.format(KD_MASTER_ROLE), "order": 200, "inbound_rules": [{ "protocol": "tcp", "dst_ports": master_public_tcp_ports, "action": "allow" }, { "protocol": "udp", "dst_ports": master_public_udp_ports, "action": "allow" }, { "action": "next-tier" }], "outbound_rules": [{ "action": "allow" }] } helpers.local( "etcdctl set " "/calico/v1/policy/tier/kuberdock-nodes/policy/kuberdock-master '{}'". format(json.dumps(KD_MASTER_POLICY))) KD_NODES_FAILSAFE_POLICY = { "id": "failsafe-all", "selector": "all()", "order": 100, "inbound_rules": [{ "protocol": "icmp", "action": "allow" }, { "dst_net": calico_network, "src_net": "{}/32".format(MASTER_TUNNEL_IP), "action": "allow" }, { "action": "next-tier" }], "outbound_rules": [{ "protocol": "tcp", "dst_ports": [2379], "dst_net": "{}/32".format(MASTER_IP), "action": "allow" }, { "src_net": "{}/32".format(MASTER_TUNNEL_IP), "action": "allow" }, { "protocol": "udp", "dst_ports": [67], "action": "allow" }, { "action": "next-tier" }] } helpers.local( "etcdctl set " "/calico/v1/policy/tier/failsafe/policy/failsafe '{}'".format( json.dumps(KD_NODES_FAILSAFE_POLICY))) MASTER_HOST_ENDPOINT = { "expected_ipv4_addrs": [MASTER_IP], "labels": { "role": KD_MASTER_ROLE }, "profile_ids": [] } MASTER_HOSTNAME = socket.gethostname() etcd_path = '/calico/v1/host/{0}/endpoint/{0}'.format(MASTER_HOSTNAME) helpers.local("etcdctl set {} '{}'".format( etcd_path, json.dumps(MASTER_HOST_ENDPOINT)))
def downgrade(upd, with_testing, exception, *args, **kwargs): upd.print_log('Reloading nginx...') helpers.local('nginx -s reload')