def cinder_volume_update_host(orig_env, new_env): orig_controller = env_util.get_one_controller(orig_env) new_controller = env_util.get_one_controller(new_env) current_host = get_current_host(orig_controller) new_host = get_new_host(new_controller) ssh.call(["cinder-manage", "volume", "update_host", "--currenthost", current_host, "--newhost", new_host], node=new_controller, parse_levels=True)
def cleanup_environment(env_id): env = objects.Environment(env_id) controller = env_util.get_one_controller(env) sftp = ssh.sftp(controller) admin_pass = env_util.get_admin_password(env, controller) script_filename = 'clean_env.py' with ssh.tempdir(controller) as tempdir: script_src_filename = os.path.join( magic_consts.CWD, "helpers", script_filename) script_dst_filename = os.path.join(tempdir, script_filename) sftp.put(script_src_filename, script_dst_filename) command = [ 'sh', '-c', '. /root/openrc; export OS_PASSWORD={0}; python {1}' .format(admin_pass, script_dst_filename), ] with ssh.popen(command, node=controller, stdin=ssh.PIPE) as proc: roles = ["controller", "compute"] for node in env_util.get_nodes(env, roles): data = "{0}\n{1}\n".format(node.data['fqdn'].split('.')[0], node.data['fqdn']) proc.stdin.write(data)
def install_node(orig_id, seed_id, node_ids, isolated=False, networks=None): if orig_id == seed_id: raise Exception("Original and seed environments have the same ID: %s", orig_id) orig_env = environment_obj.Environment(orig_id) seed_env = environment_obj.Environment(seed_id) check_networks(orig_env, seed_env, networks) nodes = [node_obj.Node(node_id) for node_id in node_ids] orig_node = env_util.get_one_controller(orig_env) seed_env.assign(nodes, orig_node.data['roles']) for node in nodes: disk_info_fixture = orig_node.get_attribute('disks') nic_info_fixture = orig_node.get_attribute('interfaces') update_node_settings(node, disk_info_fixture, nic_info_fixture) if networks: env_util.clone_ips(orig_id, networks) env_util.provision_nodes(seed_env, nodes) for node in nodes: # FIXME: properly call all handlers all over the place controller_upgrade.ControllerUpgrade( node, seed_env, isolated=isolated).predeploy() if len(nodes) > 1: isolate(nodes, seed_env) env_util.deploy_changes(seed_env, nodes) for node in nodes: controller_upgrade.ControllerUpgrade( node, seed_env, isolated=isolated).postdeploy()
def install_node(orig_id, seed_id, node_ids, isolated=False, networks=None): if orig_id == seed_id: raise Exception("Original and seed environments have the same ID: %s", orig_id) orig_env = environment_obj.Environment(orig_id) seed_env = environment_obj.Environment(seed_id) check_networks(orig_env, seed_env, networks) nodes = [node_obj.Node(node_id) for node_id in node_ids] orig_node = env_util.get_one_controller(orig_env) seed_env.assign(nodes, orig_node.data["roles"]) for node in nodes: disk_info_fixture = orig_node.get_attribute("disks") nic_info_fixture = orig_node.get_attribute("interfaces") update_node_settings(node, disk_info_fixture, nic_info_fixture) if networks: env_util.clone_ips(orig_id, networks) LOG.info("Nodes reboot in progress. Please wait...") node_util.reboot_nodes(nodes, timeout=180 * 60) node_util.wait_for_mcollective_start(nodes) env_util.provision_nodes(seed_env, nodes) env_util.update_deployment_info(seed_env, isolated) if isolated and len(nodes) > 1: isolate(nodes, seed_env) env_util.deploy_changes(seed_env, nodes) for node in nodes: controller_upgrade.ControllerUpgrade(node, seed_env, isolated=isolated).postdeploy()
def install_node(orig_id, seed_id, node_ids, isolated=False): env = environment_obj.Environment nodes = [node_obj.Node(node_id) for node_id in node_ids] if orig_id == seed_id: raise Exception("Original and seed environments have the same ID: %s", orig_id) orig_env = env(orig_id) orig_node = env_util.get_one_controller(orig_env) seed_env = env(seed_id) seed_env.assign(nodes, orig_node.data['roles']) for node in nodes: disk_info_fixture = orig_node.get_attribute('disks') nic_info_fixture = orig_node.get_attribute('interfaces') update_node_settings(node, disk_info_fixture, nic_info_fixture) env_util.provision_nodes(seed_env, nodes) for node in nodes: # FIXME: properly call all handlers all over the place controller_upgrade.ControllerUpgrade(node, seed_env, isolated=isolated).predeploy() if len(nodes) > 1: isolate(nodes, seed_env) env_util.deploy_changes(seed_env, nodes) for node in nodes: controller_upgrade.ControllerUpgrade(node, seed_env, isolated=isolated).postdeploy()
def postdeploy(self): self.restore_iscsi_initiator_info() controller = env_util.get_one_controller(self.env) # FIXME: Add more correct handling of case # when node may have not full name in services data try: ssh.call( ["sh", "-c", ". /root/openrc; " "nova service-enable {0} nova-compute".format( self.node.data['fqdn'])], node=controller, ) except subprocess.CalledProcessError as exc: LOG.warn("Cannot start service 'nova-compute' on {0} " "by reason: {1}. Try again".format( self.node.data['fqdn'], exc)) ssh.call( ["sh", "-c", ". /root/openrc; " "nova service-enable {0} nova-compute".format( self.node.data['fqdn'].split('.', 1)[0])], node=controller, ) orig_version = self.orig_env.data["fuel_version"] if orig_version == "6.1": openstack_release = magic_consts.VERSIONS[orig_version] node_util.add_compute_upgrade_levels(self.node, openstack_release) ssh.call(["service", "nova-compute", "restart"], node=self.node)
def cleanup_environment(env_id): env = objects.Environment(env_id) nodes = env.get_all_nodes() for node in nodes: node_util.remove_compute_upgrade_levels(node) controller = env_util.get_one_controller(env) sftp = ssh.sftp(controller) admin_pass = env_util.get_admin_password(env, controller) script_filename = 'clean_env.py' with ssh.tempdir(controller) as tempdir: script_src_filename = os.path.join(magic_consts.CWD, "helpers", script_filename) script_dst_filename = os.path.join(tempdir, script_filename) sftp.put(script_src_filename, script_dst_filename) command = [ 'sh', '-c', '. /root/openrc; export OS_PASSWORD={0}; python {1}'.format( admin_pass, script_dst_filename), ] with ssh.popen(command, node=controller, stdin=ssh.PIPE) as proc: roles = ["controller", "compute"] for node in env_util.get_nodes(env, roles): data = "{0}\n{1}\n".format(node.data['fqdn'].split('.')[0], node.data['fqdn']) proc.stdin.write(data)
def postdeploy(self): self.restore_iscsi_initiator_info() controller = env_util.get_one_controller(self.env) # FIXME: Add more correct handling of case # when node may have not full name in services data try: ssh.call( [ "sh", "-c", ". /root/openrc; " "nova service-enable {0} nova-compute".format( self.node.data['fqdn']) ], node=controller, ) except subprocess.CalledProcessError as exc: LOG.warn("Cannot start service 'nova-compute' on {0} " "by reason: {1}. Try again".format( self.node.data['fqdn'], exc)) ssh.call( [ "sh", "-c", ". /root/openrc; " "nova service-enable {0} nova-compute".format( self.node.data['fqdn'].split('.', 1)[0]) ], node=controller, ) orig_version = self.orig_env.data["fuel_version"] if orig_version == "6.1": openstack_release = magic_consts.VERSIONS[orig_version] node_util.add_compute_upgrade_levels(self.node, openstack_release) ssh.call(["service", "nova-compute", "restart"], node=self.node)
def upgrade_osd(orig_env_id, seed_env_id, user, password): with fuel_client.set_auth_context( backup_restore.NailgunCredentialsContext(user, password)): orig_env = env_obj.Environment(orig_env_id) nodes = list(env.get_nodes(orig_env, ["ceph-osd"])) seed_env = env_obj.Environment(seed_env_id) preference_priority = get_repo_highest_priority(orig_env) seed_repos = get_repos_for_upgrade(orig_env, seed_env) if not nodes: LOG.info("Nothing to upgrade") return controller = env.get_one_controller(seed_env) if is_same_versions_on_mon_and_osd(controller): LOG.warn("MONs and OSDs have the same version, nothing to upgrade.") return hostnames = [n.data['hostname'] for n in nodes] with applied_repos(nodes, preference_priority + 1, seed_repos): call_node = nodes[0] ssh.call(["ceph", "osd", "set", "noout"], node=call_node) ssh.call(['ceph-deploy', 'install', '--release', 'hammer'] + hostnames, node=call_node) for node in nodes: ssh.call(["restart", "ceph-osd-all"], node=node) ssh.call(["ceph", "osd", "unset", "noout"], node=call_node) waiting_until_ceph_up(controller) if not is_same_versions_on_mon_and_osd(controller): msg = "OSDs not upgraded up to MONs version, please fix the problem" LOG.error(msg) raise Exception(msg)
def install_node(orig_id, seed_id, node_ids, isolated=False, networks=None): if orig_id == seed_id: raise Exception("Original and seed environments have the same ID: %s", orig_id) orig_env = environment_obj.Environment(orig_id) seed_env = environment_obj.Environment(seed_id) check_networks(orig_env, seed_env, networks) nodes = [node_obj.Node(node_id) for node_id in node_ids] orig_node = env_util.get_one_controller(orig_env) seed_env.assign(nodes, orig_node.data['roles']) for node in nodes: disk_info_fixture = orig_node.get_attribute('disks') nic_info_fixture = orig_node.get_attribute('interfaces') update_node_settings(node, disk_info_fixture, nic_info_fixture) if networks: env_util.clone_ips(orig_id, networks) LOG.info("Nodes reboot in progress. Please wait...") node_util.reboot_nodes(nodes, timeout=180 * 60) node_util.wait_for_mcollective_start(nodes) env_util.provision_nodes(seed_env, nodes) env_util.update_deployment_info(seed_env, isolated) if isolated and len(nodes) > 1: isolate(nodes, seed_env) env_util.deploy_changes(seed_env, nodes) for node in nodes: controller_upgrade.ControllerUpgrade(node, seed_env, isolated=isolated).postdeploy()
def extract_mon_conf_files(orig_env, tar_filename): controller = env_util.get_one_controller(orig_env) conf_filename = get_ceph_conf_filename(controller) conf_dir = os.path.dirname(conf_filename) hostname = short_hostname(node_util.get_hostname_remotely(controller)) db_path = "/var/lib/ceph/mon/ceph-{0}".format(hostname) node_util.tar_files(tar_filename, controller, conf_dir, db_path) return conf_filename, db_path
def postdeploy(self): controller = env_util.get_one_controller(self.env) ssh.call( ["sh", "-c", ". /root/openrc; " "nova service-enable node-{0} nova-compute".format( self.node.data['id'])], node=controller, )
def get_databases(env): node = env_util.get_one_controller(env) with ssh.popen( ['mysql', '--batch', '--skip-column-names'], stdin=ssh.PIPE, stdout=ssh.PIPE, node=node) as proc: proc.stdin.write('SHOW DATABASES') out = proc.communicate()[0] return out.splitlines()
def extract_mon_conf_files(orig_env, tar_filename): controller = env_util.get_one_controller(orig_env) conf_filename = ceph.get_ceph_conf_filename(controller) conf_dir = os.path.dirname(conf_filename) hostname = short_hostname( node_util.get_hostname_remotely(controller)) db_path = "/var/lib/ceph/mon/ceph-{0}".format(hostname) node_util.tar_files(tar_filename, controller, conf_dir, db_path) return conf_filename, db_path
def db_sync(env): node = env_util.get_one_controller(env) ssh.call(['keystone-manage', 'db_sync'], node=node, parse_levels=True) ssh.call(['nova-manage', 'db', 'sync'], node=node, parse_levels=True) ssh.call(['heat-manage', 'db_sync'], node=node, parse_levels=True) ssh.call(['glance-manage', 'db_sync'], node=node, parse_levels=True) ssh.call(['neutron-db-manage', '--config-file=/etc/neutron/neutron.conf', 'upgrade', 'head'], node=node, parse_levels='^(?P<level>[A-Z]+)') ssh.call(['cinder-manage', 'db', 'sync'], node=node, parse_levels=True)
def cleanup_environment(env_id): env = objects.Environment(env_id) controller = env_util.get_one_controller(env) nodes = env_util.get_nodes(env, ['controller', 'compute']) for node in nodes: node_util.remove_compute_upgrade_levels(node) node_util.restart_nova_services(node) clean_services_for_node(controller, node)
def postdeploy(self): controller = env_util.get_one_controller(self.env) ssh.call( [ "sh", "-c", ". /root/openrc; " "nova service-enable node-{0} nova-compute".format( self.node.data['id']) ], node=controller, )
def mysqldump_from_env(env, dbs, fname): node = env_util.get_one_controller(env) cmd = [ 'bash', '-c', 'set -o pipefail; ' + # We want to fail if mysqldump fails 'mysqldump --add-drop-database --lock-all-tables ' '--databases {0}'.format(' '.join(dbs)) + ' | gzip', ] with ssh.popen(cmd, stdout=ssh.PIPE, node=node) as proc: with open(fname, 'wb') as local_file: shutil.copyfileobj(proc.stdout, local_file)
def write_service_tenant_id(env_id): env = environment_obj.Environment(env_id) node = env_util.get_one_controller(env) tenant_id, _ = ssh.call(["bash", "-c", ". /root/openrc;", "keystone tenant-list | ", "awk -F\| '\$2 ~ /id/{print \$3}' | tr -d \ "], stdout=ssh.PIPE, node=node) tenant_file = '%s/env-%s-service-tenant-id' % (magic_consts.FUEL_CACHE, str(env_id)) with open(tenant_file, 'w') as f: f.write(tenant_id)
def stop_corosync_services(env): node = env_util.get_one_controller(env) status_out = ssh.call_output(['crm', 'status'], node=node) for service in parse_crm_status(status_out): while True: try: ssh.call(['crm', 'resource', 'stop', service], node=node) except subprocess.CalledProcessError: pass else: break time.sleep(60)
def wait_for_corosync_services_sync(env, resource_list, status, timeout=1200, check_freq=20): status_bool = status == 'start' node = env_util.get_one_controller(env) started_at = time.time() while True: crm_out = ssh.call_output(['crm_mon', '--as-xml'], node=node) if is_resources_synced(resource_list, crm_out, status_bool): return if time.time() - started_at >= timeout: raise Exception("Timeout waiting for corosync cluster for env %s" " to be synced" % env.id) time.sleep(check_freq)
def evacuate_host(self): controller = env_util.get_one_controller(self.env) with ssh.tempdir(controller) as tempdir: local_path = os.path.join(magic_consts.CWD, 'bin', 'host_evacuation.sh') remote_path = os.path.join(tempdir, 'host_evacuation.sh') sftp = ssh.sftp(controller) sftp.put(local_path, remote_path) sftp.chmod(remote_path, stat.S_IRWXO) ssh.call( [remote_path, 'node-{0}'.format(self.node.data['id'])], node=controller, )
def write_service_tenant_id(env_id): env = environment_obj.Environment(env_id) node = env_util.get_one_controller(env) tenant_id, _ = ssh.call([ "bash", "-c", ". /root/openrc;", "keystone tenant-list | ", "awk -F\| '\$2 ~ /id/{print \$3}' | tr -d \ " ], stdout=ssh.PIPE, node=node) tenant_file = '%s/env-%s-service-tenant-id' % (magic_consts.FUEL_CACHE, str(env_id)) with open(tenant_file, 'w') as f: f.write(tenant_id)
def evacuate_host(self): controller = env_util.get_one_controller(self.env) with ssh.tempdir(controller) as tempdir: local_path = os.path.join( magic_consts.CWD, 'bin', 'host_evacuation.sh') remote_path = os.path.join(tempdir, 'host_evacuation.sh') sftp = ssh.sftp(controller) sftp.put(local_path, remote_path) sftp.chmod(remote_path, stat.S_IRWXO) ssh.call( [remote_path, 'node-{0}'.format(self.node.data['id'])], node=controller, )
def prepare(orig_id, seed_id): orig_env = environment_obj.Environment(orig_id) seed_env = environment_obj.Environment(seed_id) controller = env_util.get_one_controller(seed_env) with tempfile.NamedTemporaryFile() as temp: db.mysqldump_from_env(orig_env, ['keystone'], temp.name) db.mysqldump_restore_to_env(seed_env, temp.name) ssh.call(['keystone-manage', 'db_sync'], node=controller, parse_levels=True) for controller in env_util.get_controllers(seed_env): ssh.call(['service', 'memcached', 'restart'], node=controller)
def shutoff_vms(self): controller = env_util.get_one_controller(self.env) node_fqdn = node_util.get_nova_node_handle(self.node) if nova.do_nova_instances_exist(controller, node_fqdn, "ERROR"): raise Exception( "There are instances in ERROR state on {hostname}," "please fix this problem and start upgrade_node " "command again".format(hostname=node_fqdn)) for instance_id in nova.get_active_instances(controller, node_fqdn): nova.run_nova_cmd( ["nova", "stop", instance_id], controller, output=False) nova.waiting_for_status_completed(controller, node_fqdn, "ACTIVE")
def mysqldump_from_env(env): node = env_util.get_one_controller(env) local_fname = os.path.join(magic_consts.FUEL_CACHE, 'dbs.original.sql.gz') with ssh.popen(['sh', '-c', 'mysqldump --add-drop-database' ' --lock-all-tables --databases %s | gzip' % (' '.join(magic_consts.OS_SERVICES),)], stdout=ssh.PIPE, node=node) as proc: with open(local_fname, 'wb') as local_file: shutil.copyfileobj(proc.stdout, local_file) local_fname2 = os.path.join( magic_consts.FUEL_CACHE, 'dbs.original.cluster_%s.sql.gz' % (env.data['id'],), ) shutil.copy(local_fname, local_fname2) return local_fname
def postdeploy(self): self.restore_iscsi_initiator_info() controller = env_util.get_one_controller(self.env) # FIXME: Add more correct handling of case # when node may have not full name in services data try: call_host = self.node.data['fqdn'] nova.run_nova_cmd( ["nova", "service-enable", call_host, "nova-compute"], controller, False) except subprocess.CalledProcessError as exc: LOG.warn("Cannot start service 'nova-compute' on {0} " "by reason: {1}. Try again".format( self.node.data['fqdn'], exc)) call_host = self.node.data['fqdn'].split('.', 1)[0] nova.run_nova_cmd( ["nova", "service-enable", call_host, "nova-compute"], controller, False)
def mysqldump_from_env(env): node = env_util.get_one_controller(env) local_fname = os.path.join(magic_consts.FUEL_CACHE, 'dbs.original.sql.gz') cmd = [ 'bash', '-c', 'set -o pipefail; ' + # We want to fail if mysqldump fails 'mysqldump --add-drop-database --lock-all-tables ' '--databases {0}'.format(' '.join(magic_consts.OS_SERVICES)) + ' | gzip', ] with ssh.popen(cmd, stdout=ssh.PIPE, node=node) as proc: with open(local_fname, 'wb') as local_file: shutil.copyfileobj(proc.stdout, local_file) local_fname2 = os.path.join( magic_consts.FUEL_CACHE, 'dbs.original.cluster_%s.sql.gz' % (env.data['id'],), ) shutil.copy(local_fname, local_fname2) return local_fname
def nova_migrate_flavor_data(env, attempts=20, attempt_delay=30): node = env_util.get_one_controller(env) for i in xrange(attempts): output = ssh.call_output(['nova-manage', 'db', 'migrate_flavor_data'], node=node, parse_levels=True) match = FLAVOR_STATUS_RE.match(output) if match is None: raise Exception( "The format of the migrate_flavor_data command was changed: " "'{0}'".format(output)) params = match.groupdict() matched = int(params["matched"]) completed = int(params["completed"]) if matched == 0 or matched == completed: LOG.info("All flavors were successfully migrated.") return LOG.debug("Trying to migrate flavors data, iteration %s: %s matches, " "%s completed.", i, matched, completed) time.sleep(attempt_delay) raise Exception( "After {0} attempts flavors data migration is still not completed." .format(attempts))
def manage_corosync_services(env, status): node = env_util.get_one_controller(env) status_out = ssh.call_output(['cibadmin', '--query', '--scope', 'resources'], node=node) services_list = [] for res in get_crm_services(status_out): if any(service in res for service in _default_exclude_services): continue services_list.append(res) for service in services_list: while True: try: ssh.call(['crm', 'resource', status, service], node=node) except subprocess.CalledProcessError: # Sometimes pacemaker rejects part of requests what it is # not able to process. Sleep was added to mitigate this risk. time.sleep(1) else: break wait_for_corosync_services_sync(env, services_list, status)
def cleanup_environment(env_id): env = objects.Environment(env_id) controller = env_util.get_one_controller(env) sftp = ssh.sftp(controller) script_filename = 'clean_env.py' script_dst_filename = '/tmp/{0}'.format(script_filename) sftp.put( os.path.join(magic_consts.CWD, "helpers/{0}".format(script_filename)), script_dst_filename, ) command = ['sh', '-c', '. /root/openrc; export OS_PASSWORD=admin; python ' + script_dst_filename] with ssh.popen(command, node=controller, stdin=ssh.PIPE) as proc: roles = ["controller", "compute"] for node in env_util.get_nodes(env, roles): proc.stdin.write(node.data['fqdn']+"\n") ssh.call(['rm', '-f', script_dst_filename], node=controller)
def start_cluster(env): major_version = env.data['fuel_version'].split('.')[0] cmds = [] if int(major_version) < 6: cmds = [['service', 'corosync', 'start']] else: cmds = [['pcs', 'cluster', 'start']] controllers = list(env_util.get_controllers(env)) for node in controllers: for cmd in cmds: ssh.call(cmd, node=node) # When we start cluster we should wait while resources from constant # `_default_exclude_services` become up and running. BTW, We don't touch # these resources in stop/start corosync resources methods at all. node = env_util.get_one_controller(env) status_out = ssh.call_output(['cibadmin', '--query', '--scope', 'resources'], node=node) services_list = [] for res in get_crm_services(status_out): if any(service in res for service in _default_exclude_services): services_list.append(res) wait_for_corosync_services_sync(env, services_list, 'start')
def evacuate_host(self): controller = env_util.get_one_controller(self.env) enabled_computes, disabled_computes = nova.get_compute_lists( controller) node_fqdn = node_util.get_nova_node_handle(self.node) if [node_fqdn] == enabled_computes: raise Exception("You try to disable last enabled nova-compute " "service on {hostname} in cluster. " "This leads to disable host evacuation. " "Fix this problem and run unpgrade-node " "command again".format(hostname=node_fqdn)) if nova.do_nova_instances_exist(controller, node_fqdn, "ERROR"): raise Exception( "There are instances in ERROR state on {hostname}," "please fix this problem and start upgrade_node " "command again".format(hostname=node_fqdn)) if node_fqdn in disabled_computes: LOG.warn("Node {0} already disabled".format(node_fqdn)) else: nova.run_nova_cmd( ["nova", "service-disable", node_fqdn, "nova-compute"], controller, False) for instance_id in nova.get_active_instances(controller, node_fqdn): nova.run_nova_cmd( ["nova", "live-migration", instance_id], controller, False) nova.waiting_for_status_completed( controller, node_fqdn, "MIGRATING") if nova.do_nova_instances_exist(controller, node_fqdn): raise Exception( "There are instances on {hostname} after host-evacuation, " "please fix this problem and start upgrade_node " "command again".format(hostname=node_fqdn))
def unset_osd_noout(env): controller = env_util.get_one_controller(env) ssh.call(['ceph', 'osd', 'unset', 'noout'], node=controller)
def unset_osd_noout(env): controller = env_util.get_one_controller(env) ssh.call(["ceph", "osd", "unset", "noout"], node=controller)
def mysqldump_restore_to_env(env, fname): node = env_util.get_one_controller(env) with open(fname, 'rb') as local_file: with ssh.popen(['sh', '-c', 'zcat | mysql'], stdin=ssh.PIPE, node=node) as proc: shutil.copyfileobj(local_file, proc.stdin)
def restart_radosgw(env): node = env_util.get_one_controller(env) ssh.call(["service", "radosgw", "restart"], node=node)