def push_keys_to_host(ctx, config, public_key, private_key): """ Push keys to all hosts """ log.info('generated public key {pub_key}'.format(pub_key=public_key)) # add an entry for all hosts in ctx to auth_keys_data auth_keys_data = '' for inner_host in ctx.cluster.remotes.keys(): inner_username, inner_hostname = str(inner_host).split('@') # create a 'user@hostname' string using our fake hostname fake_hostname = '{user}@{host}'.format(user=ssh_keys_user, host=str(inner_hostname)) auth_keys_data += '\nssh-rsa {pub_key} {user_host}\n'.format( pub_key=public_key, user_host=fake_hostname) key_backup_files = dict() # for each host in ctx, add keys for all other hosts for remote in ctx.cluster.remotes: username, hostname = str(remote).split('@') if "" == username or "" == hostname: continue else: log.info('pushing keys to {host} for {user}'.format(host=hostname, user=username)) # adding a private key priv_key_file = '/home/{user}/.ssh/id_rsa'.format(user=username) priv_key_data = '{priv_key}'.format(priv_key=private_key) misc.delete_file(remote, priv_key_file, force=True) # Hadoop requires that .ssh/id_rsa have permissions of '500' misc.create_file(remote, priv_key_file, priv_key_data, str(500)) # then a private key pub_key_file = '/home/{user}/.ssh/id_rsa.pub'.format(user=username) pub_key_data = 'ssh-rsa {pub_key} {user_host}'.format( pub_key=public_key, user_host=str(remote)) misc.delete_file(remote, pub_key_file, force=True) misc.create_file(remote, pub_key_file, pub_key_data) # add appropriate entries to the authorized_keys file for this host auth_keys_file = '/home/{user}/.ssh/authorized_keys'.format( user=username) key_backup_files[remote] = backup_file(remote, auth_keys_file) misc.append_lines_to_file(remote, auth_keys_file, auth_keys_data) try: yield finally: # cleanup the keys log.info("Cleaning up SSH keys") cleanup_added_key(ctx, key_backup_files, auth_keys_file)
def push_keys_to_host(ctx, config, public_key, private_key): """ Push keys to all hosts """ log.info('generated public key {pub_key}'.format(pub_key=public_key)) # add an entry for all hosts in ctx to auth_keys_data auth_keys_data = '' for inner_host in ctx.cluster.remotes.iterkeys(): inner_username, inner_hostname = str(inner_host).split('@') # create a 'user@hostname' string using our fake hostname fake_hostname = '{user}@{host}'.format(user=ssh_keys_user, host=str(inner_hostname)) auth_keys_data += '\nssh-rsa {pub_key} {user_host}\n'.format(pub_key=public_key, user_host=fake_hostname) # for each host in ctx, add keys for all other hosts for remote in ctx.cluster.remotes: username, hostname = str(remote).split('@') if "" == username or "" == hostname: continue else: log.info('pushing keys to {host} for {user}'.format(host=hostname, user=username)) # adding a private key priv_key_file = '/home/{user}/.ssh/id_rsa'.format(user=username) priv_key_data = '{priv_key}'.format(priv_key=private_key) misc.delete_file(remote, priv_key_file, force=True) # Hadoop requires that .ssh/id_rsa have permissions of '500' misc.create_file(remote, priv_key_file, priv_key_data, str(500)) # then a private key pub_key_file = '/home/{user}/.ssh/id_rsa.pub'.format(user=username) pub_key_data = 'ssh-rsa {pub_key} {user_host}'.format(pub_key=public_key, user_host=str(remote)) misc.delete_file(remote, pub_key_file, force=True) misc.create_file(remote, pub_key_file, pub_key_data) # add appropriate entries to the authorized_keys file for this host auth_keys_file = '/home/{user}/.ssh/authorized_keys'.format( user=username) backup_file(remote, auth_keys_file) lines = '#TEUTHOLOGY_START\n' + auth_keys_data + '\n#TEUTHOLOGY_END\n' misc.append_lines_to_file(remote, auth_keys_file, lines) try: yield finally: # cleanup the keys log.info("Cleaning up SSH keys") cleanup_added_key(ctx)
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" try: log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.iteritems(): if var == 'testing': ceph_branch = '--{var}'.format(var=var) ceph_branch = '--{var}={val}'.format(var=var, val=val) node_dev_list = [] all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_roles(ctx, config, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_roles(ctx, config, 'mon') mon_nodes = " ".join(mon_node) new_mon = './ceph-deploy new'+" "+mon_nodes install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes purge_nodes = './ceph-deploy purge'+" "+all_nodes purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname deploy_mds = './ceph-deploy mds create'+" "+mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(ctx, config, new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(first_mon).remotes.keys() lines = None if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) estatus_install = execute_ceph_deploy(ctx, config, install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") mon_create_nodes = './ceph-deploy mon create-initial' # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(ctx, config, mon_create_nodes) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) max_gather_tries = 90 gather_tries = 0 while (estatus_gather != 0): gather_tries += 1 if gather_tries >= max_gather_tries: msg = 'ceph-deploy was not able to gatherkeys after 15 minutes' raise RuntimeError(msg) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) time.sleep(10) if mds_nodes: estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d] estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) osd_create_cmd = './ceph-deploy osd create --zap-disk ' for d in node_dev_list: if config.get('dmcrypt') is not None: osd_create_cmd_d = osd_create_cmd+'--dmcrypt'+" "+d else: osd_create_cmd_d = osd_create_cmd+d estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: disks = [] disks = d.split(':') dev_disk = disks[0]+":"+disks[1] j_disk = disks[0]+":"+disks[2] zap_disk = './ceph-deploy disk zap '+dev_disk+" "+j_disk execute_ceph_deploy(ctx, config, zap_disk) estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = \ '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'bash', '-c', run.Raw('"'), 'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file( remote=remot, path=client_keyring, data=key_data, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=conf_path, data=conf_data, perms='0644' ) else: raise RuntimeError( "The cluster is NOT operational due to insufficient OSDs") yield finally: log.info('Stopping ceph...') ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop' ]) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run(args=['sudo', 'status', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'status'], check_status=False) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), 'grep', '-v', 'grep', run.Raw('|'), 'grep', 'ceph'], check_status=False) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = './ceph-deploy purge'+" "+all_nodes purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes log.info('Purging package...') execute_ceph_deploy(ctx, config, purge_nodes) log.info('Purging data...') execute_ceph_deploy(ctx, config, purgedata_nodes)
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" log.info("Building ceph cluster using ceph-deploy...") testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get("branch") is not None: cbranch = config.get("branch") for var, val in cbranch.iteritems(): if var == "testing": ceph_branch = "--{var}".format(var=var) ceph_branch = "--{var}={val}".format(var=var, val=val) node_dev_list = [] all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_roles(ctx, config, "mds") mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_roles(ctx, config, "mon") mon_nodes = " ".join(mon_node) new_mon = "./ceph-deploy new" + " " + mon_nodes install_nodes = "./ceph-deploy install " + ceph_branch + " " + all_nodes purge_nodes = "./ceph-deploy purge" + " " + all_nodes purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes mon_hostname = mon_nodes.split(" ")[0] mon_hostname = str(mon_hostname) gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(ctx, config, new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info("adding config inputs...") testdir = teuthology.get_testdir(ctx) conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(first_mon).remotes.keys() lines = None if config.get("conf") is not None: confp = config.get("conf") for section, keys in confp.iteritems(): lines = "[{section}]\n".format(section=section) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = "{key} = {value}\n".format(key=key, value=value) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) estatus_install = execute_ceph_deploy(ctx, config, install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") mon_no = None mon_no = config.get("mon_initial_members") if mon_no is not None: i = 0 mon1 = [] while i < mon_no: mon1.append(mon_node[i]) i = i + 1 initial_mons = " ".join(mon1) for k in range(mon_no, len(mon_node)): mon_create_nodes = "./ceph-deploy mon create" + " " + initial_mons + " " + mon_node[k] estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitor") else: mon_create_nodes = "./ceph-deploy mon create" + " " + mon_nodes estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitors") estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) while estatus_gather != 0: # mon_create_nodes = './ceph-deploy mon create'+" "+mon_node[0] # execute_ceph_deploy(ctx, config, mon_create_nodes) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) if mds_nodes: estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get("test_mon_destroy") is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: osd_create_cmds = "./ceph-deploy osd create --zap-disk" + " " + d estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: zap_disk = "./ceph-deploy disk zap" + " " + d execute_ceph_deploy(ctx, config, zap_disk) estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get("wait-for-healthy", True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info("Setting up client nodes...") conf_path = "/etc/ceph/ceph.conf" admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring" first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True) admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True) clients = ctx.cluster.only(teuthology.is_type("client")) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, "client"): client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_) mon0_remote.run( args=[ "cd", "{tdir}".format(tdir=testdir), run.Raw("&&"), "sudo", "bash", "-c", run.Raw('"'), "ceph", "auth", "get-or-create", "client.{id}".format(id=id_), "mds", "allow", "mon", "allow *", "osd", "allow *", run.Raw(">"), client_keyring, run.Raw('"'), ] ) key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True) teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644") teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644") teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644") else: raise RuntimeError("The cluster is NOT operational due to insufficient OSDs") try: yield finally: log.info("Stopping ceph...") ctx.cluster.run(args=["sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop"]) if ctx.archive is not None: # archive mon data, too log.info("Archiving mon data...") path = os.path.join(ctx.archive, "data") os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type("mon")) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith("mon."): teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz") log.info("Compressing logs...") run.wait( ctx.cluster.run( args=[ "sudo", "find", "/var/log/ceph", "-name", "*.log", "-print0", run.Raw("|"), "sudo", "xargs", "-0", "--no-run-if-empty", "--", "gzip", "--", ], wait=False, ) ) log.info("Archiving logs...") path = os.path.join(ctx.archive, "remote") os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log")) log.info("Purging package...") execute_ceph_deploy(ctx, config, purge_nodes) log.info("Purging data...") execute_ceph_deploy(ctx, config, purgedata_nodes)
def cli_test(ctx, config): """ ceph-deploy cli to exercise most commonly use cli's and ensure all commands works and also startup the init system. """ log.info('Ceph-deploy Test') if config is None: config = {} test_branch = '' conf_dir = teuthology.get_testdir(ctx) + "/cdtest" def execute_cdeploy(admin, cmd, path): """Execute ceph-deploy commands """ """Either use git path or repo path """ args = ['cd', conf_dir, run.Raw(';')] if path: args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)) else: args.append('ceph-deploy') args.append(run.Raw(cmd)) ec = admin.run(args=args, check_status=False).exitstatus if ec != 0: raise RuntimeError( "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) if config.get('rhbuild'): path = None else: path = teuthology.get_testdir(ctx) # test on branch from config eg: wip-* , master or next etc # packages for all distro's should exist for wip* if ctx.config.get('branch'): branch = ctx.config.get('branch') test_branch = ' --dev={branch} '.format(branch=branch) mons = ctx.cluster.only(teuthology.is_type('mon')) for node, role in mons.remotes.items(): admin = node admin.run(args=['mkdir', conf_dir], check_status=False) nodename = admin.shortname system_type = teuthology.get_system_type(admin) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) log.info('system type is %s', system_type) osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles in osds.remotes.items(): devs = teuthology.get_scratch_devices(remote) log.info("roles %s", roles) if (len(devs) < 3): log.error( 'Test needs minimum of 3 devices, only found %s', str(devs)) raise RuntimeError("Needs minimum of 3 devices ") conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) new_cmd = 'new ' + nodename execute_cdeploy(admin, new_cmd, path) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.items(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) for key, value in keys.items(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) new_mon_install = 'install {branch} --mon '.format( branch=test_branch) + nodename new_mgr_install = 'install {branch} --mgr '.format( branch=test_branch) + nodename new_osd_install = 'install {branch} --osd '.format( branch=test_branch) + nodename new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename create_initial = 'mon create-initial ' mgr_create = 'mgr create ' + nodename # either use create-keys or push command push_keys = 'admin ' + nodename execute_cdeploy(admin, new_mon_install, path) execute_cdeploy(admin, new_mgr_install, path) execute_cdeploy(admin, new_osd_install, path) execute_cdeploy(admin, new_admin, path) execute_cdeploy(admin, create_initial, path) execute_cdeploy(admin, mgr_create, path) execute_cdeploy(admin, push_keys, path) for i in range(3): zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) execute_cdeploy(admin, zap_disk, path) execute_cdeploy(admin, prepare, path) log.info("list files for debugging purpose to check file permissions") admin.run(args=['ls', run.Raw('-lt'), conf_dir]) remote.run(args=['sudo', 'ceph', '-s'], check_status=False) r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() log.info('Ceph health: %s', out.rstrip('\n')) log.info("Waiting for cluster to become healthy") with contextutil.safe_while(sleep=10, tries=6, action='check health') as proceed: while proceed(): r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() if (out.split(None, 1)[0] == 'HEALTH_OK'): break rgw_install = 'install {branch} --rgw {node}'.format( branch=test_branch, node=nodename, ) rgw_create = 'rgw create ' + nodename execute_cdeploy(admin, rgw_install, path) execute_cdeploy(admin, rgw_create, path) log.info('All ceph-deploy cli tests passed') try: yield finally: log.info("cleaning up") ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], check_status=False) time.sleep(4) for i in range(3): umount_dev = "{d}1".format(d=devs[i]) r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) cmd = 'purge ' + nodename execute_cdeploy(admin, cmd, path) cmd = 'purgedata ' + nodename execute_cdeploy(admin, cmd, path) log.info("Removing temporary dir") admin.run( args=[ 'rm', run.Raw('-rf'), run.Raw(conf_dir)], check_status=False) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" # Expect to find ceph_admin on the first mon by ID, same place that the download task # puts it. Remember this here, because subsequently IDs will change from those in # the test config to those that ceph-deploy invents. (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys() def execute_ceph_deploy(cmd): """Remotely execute a ceph_deploy command""" return ceph_admin.run( args=[ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), run.Raw(cmd), ], check_status=False, ).exitstatus def ceph_disk_osd_create(ctx, config): node_dev_list = get_dev_for_osd(ctx, config) no_of_osds = 0 for d in node_dev_list: node = d[0] for disk in d[1:]: zap = './ceph-deploy disk zap ' + node + ' ' + disk estatus = execute_ceph_deploy(zap) if estatus != 0: raise RuntimeError("ceph-deploy: Failed to zap osds") osd_create_cmd = './ceph-deploy osd create ' # first check for filestore, default is bluestore with ceph-deploy if config.get('filestore') is not None: osd_create_cmd += '--filestore ' elif config.get('bluestore') is not None: osd_create_cmd += '--bluestore ' if config.get('dmcrypt') is not None: osd_create_cmd += '--dmcrypt ' osd_create_cmd += ":".join(d) estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") return no_of_osds def ceph_volume_osd_create(ctx, config): osds = ctx.cluster.only(teuthology.is_type('osd')) no_of_osds = 0 for remote in osds.remotes.keys(): # all devs should be lvm osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' ' # default is bluestore so we just need config item for filestore roles = ctx.cluster.remotes[remote] dev_needed = len([role for role in roles if role.startswith('osd')]) all_devs = teuthology.get_scratch_devices(remote) log.info("node={n}, need_devs={d}, available={a}".format( n=remote.shortname, d=dev_needed, a=all_devs, )) devs = all_devs[0:dev_needed] # rest of the devices can be used for journal if required jdevs = dev_needed for device in devs: device_split = device.split('/') lv_device = device_split[-2] + '/' + device_split[-1] if config.get('filestore') is not None: osd_create_cmd += '--filestore --data ' + lv_device + ' ' # filestore with ceph-volume also needs journal disk try: jdevice = all_devs.pop(jdevs) except IndexError: raise RuntimeError("No device available for \ journal configuration") jdevice_split = jdevice.split('/') j_lv = jdevice_split[-2] + '/' + jdevice_split[-1] osd_create_cmd += '--journal ' + j_lv else: osd_create_cmd += ' --data ' + lv_device estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") return no_of_osds try: log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.items(): ceph_branch = '--{var}={val}'.format(var=var, val=val) all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_role(ctx, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_role(ctx, 'mon') mon_nodes = " ".join(mon_node) # skip mgr based on config item # this is needed when test uses latest code to install old ceph # versions skip_mgr = config.get('skip-mgr', False) if not skip_mgr: mgr_nodes = get_nodes_using_role(ctx, 'mgr') mgr_nodes = " ".join(mgr_nodes) new_mon = './ceph-deploy new' + " " + mon_nodes if not skip_mgr: mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname deploy_mds = './ceph-deploy mds create' + " " + mds_nodes if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.items(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) for key, value in keys.items(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file( ceph_admin, conf_path, lines, sudo=True) # install ceph dev_branch = ctx.config['branch'] branch = '--dev={branch}'.format(branch=dev_branch) if ceph_branch: option = ceph_branch else: option = branch install_nodes = './ceph-deploy install ' + option + " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") # install ceph-test package too install_nodes2 = './ceph-deploy install --tests ' + option + \ " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes2) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph-test") mon_create_nodes = './ceph-deploy mon create-initial' # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(mon_create_nodes) estatus_gather = execute_ceph_deploy(gather_keys) if estatus_gather != 0: raise RuntimeError("ceph-deploy: Failed during gather keys") # install admin key on mons (ceph-create-keys doesn't do this any more) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote in mons.remotes.keys(): execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname) # create osd's if config.get('use-ceph-volume', False): no_of_osds = ceph_volume_osd_create(ctx, config) else: # this method will only work with ceph-deploy v1.5.39 or older no_of_osds = ceph_disk_osd_create(ctx, config) if not skip_mgr: execute_ceph_deploy(mgr_create) if mds_nodes: estatus_mds = execute_ceph_deploy(deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy' + \ " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.items(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = \ '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'bash', '-c', run.Raw('"'), 'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file( remote=remot, path=client_keyring, data=key_data, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=conf_path, data=conf_data, perms='0644' ) if mds_nodes: log.info('Configuring CephFS...') Filesystem(ctx, create=True) elif not config.get('only_mon'): raise RuntimeError( "The cluster is NOT operational due to insufficient OSDs") # create rbd pool ceph_admin.run( args=[ 'sudo', 'ceph', '--cluster', 'ceph', 'osd', 'pool', 'create', 'rbd', '128', '128'], check_status=False) ceph_admin.run( args=[ 'sudo', 'ceph', '--cluster', 'ceph', 'osd', 'pool', 'application', 'enable', 'rbd', 'rbd', '--yes-i-really-mean-it' ], check_status=False) yield except Exception: log.info( "Error encountered, logging exception before tearing down ceph-deploy") log.info(traceback.format_exc()) raise finally: if config.get('keep_running'): return log.info('Stopping ceph...') ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], check_status=False) time.sleep(4) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), 'grep', '-v', 'grep', run.Raw('|'), 'grep', 'ceph'], check_status=False) ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'), 'grep', 'ceph'], check_status=False) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.items(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.keys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = './ceph-deploy purge' + " " + all_nodes purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes log.info('Purging package...') execute_ceph_deploy(purge_nodes) log.info('Purging data...') execute_ceph_deploy(purgedata_nodes)
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" try: log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.iteritems(): if var == 'testing': ceph_branch = '--{var}'.format(var=var) ceph_branch = '--{var}={val}'.format(var=var, val=val) node_dev_list = [] all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_roles(ctx, config, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_roles(ctx, config, 'mon') mon_nodes = " ".join(mon_node) new_mon = './ceph-deploy new' + " " + mon_nodes install_nodes = './ceph-deploy install ' + ceph_branch + " " + all_nodes purge_nodes = './ceph-deploy purge' + " " + all_nodes purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname deploy_mds = './ceph-deploy mds create' + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(ctx, config, new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (remote, ) = ctx.cluster.only(first_mon).remotes.keys() lines = None if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) estatus_install = execute_ceph_deploy(ctx, config, install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") mon_no = None mon_no = config.get('mon_initial_members') if mon_no is not None: i = 0 mon1 = [] while (i < mon_no): mon1.append(mon_node[i]) i = i + 1 initial_mons = " ".join(mon1) for k in range(mon_no, len(mon_node)): mon_create_nodes = './ceph-deploy mon create' + " " + \ initial_mons + " " + mon_node[k] estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitor") else: mon_create_nodes = './ceph-deploy mon create-initial' estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitors") estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) max_gather_tries = 90 gather_tries = 0 while (estatus_gather != 0): gather_tries += 1 if gather_tries >= max_gather_tries: msg = 'ceph-deploy was not able to gatherkeys after 15 minutes' raise RuntimeError(msg) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) time.sleep(10) if mds_nodes: estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy' + " " + mon_node[ d] estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: osd_create_cmds = './ceph-deploy osd create --zap-disk' + " " + d estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: zap_disk = './ceph-deploy disk zap' + " " + d execute_ceph_deploy(ctx, config, zap_disk) estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote, ) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = \ '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run(args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'bash', '-c', run.Raw('"'), 'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms='0644') teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644') teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms='0644') else: raise RuntimeError( "The cluster is NOT operational due to insufficient OSDs") yield finally: log.info('Stopping ceph...') ctx.cluster.run(args=[ 'sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop' ]) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = './ceph-deploy purge' + " " + all_nodes purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes log.info('Purging package...') execute_ceph_deploy(ctx, config, purge_nodes) log.info('Purging data...') execute_ceph_deploy(ctx, config, purgedata_nodes)
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" # Expect to find ceph_admin on the first mon by ID, same place that the download task # puts it. Remember this here, because subsequently IDs will change from those in # the test config to those that ceph-deploy invents. (ceph_admin,) = ctx.cluster.only( teuthology.get_first_mon(ctx, config)).remotes.iterkeys() def execute_ceph_deploy(cmd): """Remotely execute a ceph_deploy command""" return ceph_admin.run( args=[ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), run.Raw(cmd), ], check_status=False, ).exitstatus try: log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.iteritems(): ceph_branch = '--{var}={val}'.format(var=var, val=val) all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_role(ctx, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_role(ctx, 'mon') mon_nodes = " ".join(mon_node) new_mon = './ceph-deploy new' + " " + mon_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname deploy_mds = './ceph-deploy mds create' + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file( ceph_admin, conf_path, lines, sudo=True) # install ceph dev_branch = ctx.config['branch'] branch = '--dev={branch}'.format(branch=dev_branch) if ceph_branch: option = ceph_branch else: option = branch install_nodes = './ceph-deploy install ' + option + " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") # install ceph-test package too install_nodes2 = './ceph-deploy install --tests ' + option + \ " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes2) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph-test") mon_create_nodes = './ceph-deploy mon create-initial' # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(mon_create_nodes) # create-keys is explicit now # http://tracker.ceph.com/issues/16036 mons = ctx.cluster.only(teuthology.is_type('mon')) for remote in mons.remotes.iterkeys(): remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph', '--id', remote.shortname]) estatus_gather = execute_ceph_deploy(gather_keys) if mds_nodes: estatus_mds = execute_ceph_deploy(deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy' + \ " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: node = d[0] for disk in d[1:]: zap = './ceph-deploy disk zap ' + node + ':' + disk estatus = execute_ceph_deploy(zap) if estatus != 0: raise RuntimeError("ceph-deploy: Failed to zap osds") osd_create_cmd = './ceph-deploy osd create ' if config.get('dmcrypt') is not None: osd_create_cmd += '--dmcrypt ' osd_create_cmd += ":".join(d) estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = \ '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'bash', '-c', run.Raw('"'), 'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file( remote=remot, path=client_keyring, data=key_data, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=conf_path, data=conf_data, perms='0644' ) if mds_nodes: log.info('Configuring CephFS...') ceph_fs = Filesystem(ctx) if not ceph_fs.legacy_configured(): ceph_fs.create() elif not config.get('only_mon'): raise RuntimeError( "The cluster is NOT operational due to insufficient OSDs") yield except Exception: log.info( "Error encountered, logging exception before tearing down ceph-deploy") log.info(traceback.format_exc()) raise finally: if config.get('keep_running'): return log.info('Stopping ceph...') ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target']) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run( args=[ 'sudo', 'status', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'status', run.Raw('||'), 'sudo', 'systemctl', 'status', 'ceph.target'], check_status=False) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), 'grep', '-v', 'grep', run.Raw('|'), 'grep', 'ceph'], check_status=False) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = './ceph-deploy purge' + " " + all_nodes purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes log.info('Purging package...') execute_ceph_deploy(purge_nodes) log.info('Purging data...') execute_ceph_deploy(purgedata_nodes)
def build_ceph_cluster(ctx, config): log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.iteritems(): if var == 'testing': ceph_branch = '--{var}'.format(var=var) ceph_branch = '--{var}={val}'.format(var=var, val=val) node_dev_list = [] all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_roles(ctx, config, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_roles(ctx, config, 'mon') mon_nodes = " ".join(mon_node) new_mon = './ceph-deploy new'+" "+mon_nodes install_nodes = './ceph-deploy install '+ceph_branch+" "+all_nodes purge_nodes = './ceph-deploy purge'+" "+all_nodes purgedata_nodes = './ceph-deploy purgedata'+" "+all_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys'+" "+mon_hostname deploy_mds = './ceph-deploy mds create'+" "+mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(ctx, config, new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(first_mon).remotes.keys() lines = None if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) estatus_install = execute_ceph_deploy(ctx, config, install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") mon_no = None mon_no = config.get('mon_initial_members') if mon_no is not None: i = 0 mon1 = [] while(i < mon_no): mon1.append(mon_node[i]) i = i + 1 initial_mons = " ".join(mon1) for k in range(mon_no, len(mon_node)): mon_create_nodes = './ceph-deploy mon create'+" "+initial_mons+" "+mon_node[k] estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitor") else: mon_create_nodes = './ceph-deploy mon create'+" "+mon_nodes estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) if estatus_mon != 0: raise RuntimeError("ceph-deploy: Failed to create monitors") estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) while (estatus_gather != 0): #mon_create_nodes = './ceph-deploy mon create'+" "+mon_node[0] #execute_ceph_deploy(ctx, config, mon_create_nodes) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) if mds_nodes: estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy'+" "+mon_node[d] estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: osd_create_cmds = './ceph-deploy osd create --zap-disk'+" "+d estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: zap_disk = './ceph-deploy disk zap'+" "+d execute_ceph_deploy(ctx, config, zap_disk) estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmds) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo','bash','-c', run.Raw('"'),'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file( remote=remot, path=client_keyring, data=key_data, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=conf_path, data=conf_data, perms='0644' ) else: raise RuntimeError("The cluster is NOT operational due to insufficient OSDs") try: yield finally: log.info('Stopping ceph...') ctx.cluster.run(args=[ 'sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop' ]) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) log.info('Purging package...') execute_ceph_deploy(ctx, config, purge_nodes) log.info('Purging data...') execute_ceph_deploy(ctx, config, purgedata_nodes)
def cli_test(ctx, config): """ ceph-deploy cli to exercise most commonly use cli's and ensure all commands works and also startup the init system. """ log.info('Ceph-deploy Test') if config is None: config = {} test_branch = '' conf_dir = teuthology.get_testdir(ctx) + "/cdtest" def execute_cdeploy(admin, cmd, path): """Execute ceph-deploy commands """ """Either use git path or repo path """ args = ['cd', conf_dir, run.Raw(';')] if path: args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)); else: args.append('ceph-deploy') args.append(run.Raw(cmd)) ec = admin.run(args=args, check_status=False).exitstatus if ec != 0: raise RuntimeError( "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) if config.get('rhbuild'): path = None else: path = teuthology.get_testdir(ctx) # test on branch from config eg: wip-* , master or next etc # packages for all distro's should exist for wip* if ctx.config.get('branch'): branch = ctx.config.get('branch') test_branch = ' --dev={branch} '.format(branch=branch) mons = ctx.cluster.only(teuthology.is_type('mon')) for node, role in mons.remotes.iteritems(): admin = node admin.run(args=['mkdir', conf_dir], check_status=False) nodename = admin.shortname system_type = teuthology.get_system_type(admin) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) log.info('system type is %s', system_type) osds = ctx.cluster.only(teuthology.is_type('osd')) for remote, roles in osds.remotes.iteritems(): devs = teuthology.get_scratch_devices(remote) log.info("roles %s", roles) if (len(devs) < 3): log.error( 'Test needs minimum of 3 devices, only found %s', str(devs)) raise RuntimeError("Needs minimum of 3 devices ") conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) new_cmd = 'new ' + nodename execute_cdeploy(admin, new_cmd, path) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file(admin, conf_path, lines, sudo=True) new_mon_install = 'install {branch} --mon '.format( branch=test_branch) + nodename new_mgr_install = 'install {branch} --mgr '.format( branch=test_branch) + nodename new_osd_install = 'install {branch} --osd '.format( branch=test_branch) + nodename new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename create_initial = 'mon create-initial ' # either use create-keys or push command push_keys = 'admin ' + nodename execute_cdeploy(admin, new_mon_install, path) execute_cdeploy(admin, new_mgr_install, path) execute_cdeploy(admin, new_osd_install, path) execute_cdeploy(admin, new_admin, path) execute_cdeploy(admin, create_initial, path) execute_cdeploy(admin, push_keys, path) for i in range(3): zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) execute_cdeploy(admin, zap_disk, path) execute_cdeploy(admin, prepare, path) log.info("list files for debugging purpose to check file permissions") admin.run(args=['ls', run.Raw('-lt'), conf_dir]) remote.run(args=['sudo', 'ceph', '-s'], check_status=False) r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() log.info('Ceph health: %s', out.rstrip('\n')) log.info("Waiting for cluster to become healthy") with contextutil.safe_while(sleep=10, tries=6, action='check health') as proceed: while proceed(): r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) out = r.stdout.getvalue() if (out.split(None,1)[0] == 'HEALTH_OK'): break rgw_install = 'install {branch} --rgw {node}'.format( branch=test_branch, node=nodename, ) rgw_create = 'rgw create ' + nodename execute_cdeploy(admin, rgw_install, path) execute_cdeploy(admin, rgw_create, path) log.info('All ceph-deploy cli tests passed') try: yield finally: log.info("cleaning up") ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target'], check_status=False) time.sleep(4) for i in range(3): umount_dev = "{d}1".format(d=devs[i]) r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) cmd = 'purge ' + nodename execute_cdeploy(admin, cmd, path) cmd = 'purgedata ' + nodename execute_cdeploy(admin, cmd, path) log.info("Removing temporary dir") admin.run( args=[ 'rm', run.Raw('-rf'), run.Raw(conf_dir)], check_status=False) if config.get('rhbuild'): admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" # Expect to find ceph_admin on the first mon by ID, same place that the download task # puts it. Remember this here, because subsequently IDs will change from those in # the test config to those that ceph-deploy invents. (ceph_admin,) = ctx.cluster.only( teuthology.get_first_mon(ctx, config)).remotes.iterkeys() def execute_ceph_deploy(cmd): """Remotely execute a ceph_deploy command""" return ceph_admin.run( args=[ 'cd', '{tdir}/ceph-deploy'.format(tdir=testdir), run.Raw('&&'), run.Raw(cmd), ], check_status=False, ).exitstatus try: log.info('Building ceph cluster using ceph-deploy...') testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get('branch') is not None: cbranch = config.get('branch') for var, val in cbranch.iteritems(): ceph_branch = '--{var}={val}'.format(var=var, val=val) all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_role(ctx, 'mds') mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_role(ctx, 'mon') mon_nodes = " ".join(mon_node) mgr_nodes = get_nodes_using_role(ctx, 'mgr') mgr_nodes = " ".join(mgr_nodes) new_mon = './ceph-deploy new' + " " + mon_nodes mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes mon_hostname = mon_nodes.split(' ')[0] mon_hostname = str(mon_hostname) gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname deploy_mds = './ceph-deploy mds create' + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info('adding config inputs...') testdir = teuthology.get_testdir(ctx) conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) if config.get('conf') is not None: confp = config.get('conf') for section, keys in confp.iteritems(): lines = '[{section}]\n'.format(section=section) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = '{key} = {value}\n'.format(key=key, value=value) teuthology.append_lines_to_file( ceph_admin, conf_path, lines, sudo=True) # install ceph dev_branch = ctx.config['branch'] branch = '--dev={branch}'.format(branch=dev_branch) if ceph_branch: option = ceph_branch else: option = branch install_nodes = './ceph-deploy install ' + option + " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") # install ceph-test package too install_nodes2 = './ceph-deploy install --tests ' + option + \ " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes2) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph-test") mon_create_nodes = './ceph-deploy mon create-initial' # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(mon_create_nodes) execute_ceph_deploy(mgr_create) # create-keys is explicit now # http://tracker.ceph.com/issues/16036 mons = ctx.cluster.only(teuthology.is_type('mon')) for remote in mons.remotes.iterkeys(): remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph', '--id', remote.shortname]) estatus_gather = execute_ceph_deploy(gather_keys) if mds_nodes: estatus_mds = execute_ceph_deploy(deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get('test_mon_destroy') is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = './ceph-deploy mon destroy' + \ " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: node = d[0] for disk in d[1:]: zap = './ceph-deploy disk zap ' + node + ':' + disk estatus = execute_ceph_deploy(zap) if estatus != 0: raise RuntimeError("ceph-deploy: Failed to zap osds") osd_create_cmd = './ceph-deploy osd create ' if config.get('dmcrypt') is not None: osd_create_cmd += '--dmcrypt ' osd_create_cmd += ":".join(d) estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info('successfully created osd') no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get('wait-for-healthy', True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info('Setting up client nodes...') conf_path = '/etc/ceph/ceph.conf' admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file( remote=mon0_remote, path=conf_path, sudo=True, ) admin_keyring = teuthology.get_file( remote=mon0_remote, path=admin_keyring_path, sudo=True, ) clients = ctx.cluster.only(teuthology.is_type('client')) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, 'client'): client_keyring = \ '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) mon0_remote.run( args=[ 'cd', '{tdir}'.format(tdir=testdir), run.Raw('&&'), 'sudo', 'bash', '-c', run.Raw('"'), 'ceph', 'auth', 'get-or-create', 'client.{id}'.format(id=id_), 'mds', 'allow', 'mon', 'allow *', 'osd', 'allow *', run.Raw('>'), client_keyring, run.Raw('"'), ], ) key_data = teuthology.get_file( remote=mon0_remote, path=client_keyring, sudo=True, ) teuthology.sudo_write_file( remote=remot, path=client_keyring, data=key_data, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=admin_keyring_path, data=admin_keyring, perms='0644' ) teuthology.sudo_write_file( remote=remot, path=conf_path, data=conf_data, perms='0644' ) if mds_nodes: log.info('Configuring CephFS...') ceph_fs = Filesystem(ctx, create=True) elif not config.get('only_mon'): raise RuntimeError( "The cluster is NOT operational due to insufficient OSDs") yield except Exception: log.info( "Error encountered, logging exception before tearing down ceph-deploy") log.info(traceback.format_exc()) raise finally: if config.get('keep_running'): return log.info('Stopping ceph...') ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), 'sudo', 'systemctl', 'stop', 'ceph.target']) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run( args=[ 'sudo', 'status', 'ceph-all', run.Raw('||'), 'sudo', 'service', 'ceph', 'status', run.Raw('||'), 'sudo', 'systemctl', 'status', 'ceph.target'], check_status=False) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), 'grep', '-v', 'grep', run.Raw('|'), 'grep', 'ceph'], check_status=False) if ctx.archive is not None: # archive mon data, too log.info('Archiving mon data...') path = os.path.join(ctx.archive, 'data') os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type('mon')) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith('mon.'): teuthology.pull_directory_tarball( remote, '/var/lib/ceph/mon', path + '/' + role + '.tgz') log.info('Compressing logs...') run.wait( ctx.cluster.run( args=[ 'sudo', 'find', '/var/log/ceph', '-name', '*.log', '-print0', run.Raw('|'), 'sudo', 'xargs', '-0', '--no-run-if-empty', '--', 'gzip', '--', ], wait=False, ), ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, '/var/log/ceph', os.path.join(sub, 'log')) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = './ceph-deploy purge' + " " + all_nodes purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes log.info('Purging package...') execute_ceph_deploy(purge_nodes) log.info('Purging data...') execute_ceph_deploy(purgedata_nodes)
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" try: log.info("Building ceph cluster using ceph-deploy...") testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get("branch") is not None: cbranch = config.get("branch") for var, val in cbranch.iteritems(): if var == "testing": ceph_branch = "--{var}".format(var=var) ceph_branch = "--{var}={val}".format(var=var, val=val) node_dev_list = [] all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_roles(ctx, config, "mds") mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_roles(ctx, config, "mon") mon_nodes = " ".join(mon_node) new_mon = "./ceph-deploy new" + " " + mon_nodes install_nodes = "./ceph-deploy install " + ceph_branch + " " + all_nodes purge_nodes = "./ceph-deploy purge" + " " + all_nodes purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes mon_hostname = mon_nodes.split(" ")[0] mon_hostname = str(mon_hostname) gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(ctx, config, new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info("adding config inputs...") testdir = teuthology.get_testdir(ctx) conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir) first_mon = teuthology.get_first_mon(ctx, config) (remote,) = ctx.cluster.only(first_mon).remotes.keys() lines = None if config.get("conf") is not None: confp = config.get("conf") for section, keys in confp.iteritems(): lines = "[{section}]\n".format(section=section) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = "{key} = {value}\n".format(key=key, value=value) teuthology.append_lines_to_file(remote, conf_path, lines, sudo=True) estatus_install = execute_ceph_deploy(ctx, config, install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") mon_create_nodes = "./ceph-deploy mon create-initial" # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. estatus_mon = execute_ceph_deploy(ctx, config, mon_create_nodes) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) max_gather_tries = 90 gather_tries = 0 while estatus_gather != 0: gather_tries += 1 if gather_tries >= max_gather_tries: msg = "ceph-deploy was not able to gatherkeys after 15 minutes" raise RuntimeError(msg) estatus_gather = execute_ceph_deploy(ctx, config, gather_keys) time.sleep(10) if mds_nodes: estatus_mds = execute_ceph_deploy(ctx, config, deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get("test_mon_destroy") is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(ctx, config, mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) osd_create_cmd = "./ceph-deploy osd create --zap-disk " for d in node_dev_list: if config.get("dmcrypt") is not None: osd_create_cmd_d = osd_create_cmd + "--dmcrypt" + " " + d else: osd_create_cmd_d = osd_create_cmd + d estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: disks = [] disks = d.split(":") dev_disk = disks[0] + ":" + disks[1] j_disk = disks[0] + ":" + disks[2] zap_disk = "./ceph-deploy disk zap " + dev_disk + " " + j_disk execute_ceph_deploy(ctx, config, zap_disk) estatus_osd = execute_ceph_deploy(ctx, config, osd_create_cmd_d) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get("wait-for-healthy", True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info("Setting up client nodes...") conf_path = "/etc/ceph/ceph.conf" admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring" first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True) admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True) clients = ctx.cluster.only(teuthology.is_type("client")) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, "client"): client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_) mon0_remote.run( args=[ "cd", "{tdir}".format(tdir=testdir), run.Raw("&&"), "sudo", "bash", "-c", run.Raw('"'), "ceph", "auth", "get-or-create", "client.{id}".format(id=id_), "mds", "allow", "mon", "allow *", "osd", "allow *", run.Raw(">"), client_keyring, run.Raw('"'), ] ) key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True) teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644") teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644") teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644") else: raise RuntimeError("The cluster is NOT operational due to insufficient OSDs") yield finally: log.info("Stopping ceph...") ctx.cluster.run(args=["sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop"]) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run( args=["sudo", "status", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "status"], check_status=False ) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run( args=["sudo", "ps", "aux", run.Raw("|"), "grep", "-v", "grep", run.Raw("|"), "grep", "ceph"], check_status=False, ) if ctx.archive is not None: # archive mon data, too log.info("Archiving mon data...") path = os.path.join(ctx.archive, "data") os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type("mon")) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith("mon."): teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz") log.info("Compressing logs...") run.wait( ctx.cluster.run( args=[ "sudo", "find", "/var/log/ceph", "-name", "*.log", "-print0", run.Raw("|"), "sudo", "xargs", "-0", "--no-run-if-empty", "--", "gzip", "--", ], wait=False, ) ) log.info("Archiving logs...") path = os.path.join(ctx.archive, "remote") os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log")) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = "./ceph-deploy purge" + " " + all_nodes purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes log.info("Purging package...") execute_ceph_deploy(ctx, config, purge_nodes) log.info("Purging data...") execute_ceph_deploy(ctx, config, purgedata_nodes)
def build_ceph_cluster(ctx, config): """Build a ceph cluster""" # Expect to find ceph_admin on the first mon by ID, same place that the download task # puts it. Remember this here, because subsequently IDs will change from those in # the test config to those that ceph-deploy invents. (ceph_admin,) = ctx.cluster.only(teuthology.get_first_mon(ctx, config)).remotes.iterkeys() def execute_ceph_deploy(cmd): """Remotely execute a ceph_deploy command""" return ceph_admin.run( args=["cd", "{tdir}/ceph-deploy".format(tdir=testdir), run.Raw("&&"), run.Raw(cmd)], check_status=False ).exitstatus try: log.info("Building ceph cluster using ceph-deploy...") testdir = teuthology.get_testdir(ctx) ceph_branch = None if config.get("branch") is not None: cbranch = config.get("branch") for var, val in cbranch.iteritems(): ceph_branch = "--{var}={val}".format(var=var, val=val) all_nodes = get_all_nodes(ctx, config) mds_nodes = get_nodes_using_role(ctx, "mds") mds_nodes = " ".join(mds_nodes) mon_node = get_nodes_using_role(ctx, "mon") mon_nodes = " ".join(mon_node) new_mon = "./ceph-deploy new" + " " + mon_nodes mon_hostname = mon_nodes.split(" ")[0] mon_hostname = str(mon_hostname) gather_keys = "./ceph-deploy gatherkeys" + " " + mon_hostname deploy_mds = "./ceph-deploy mds create" + " " + mds_nodes no_of_osds = 0 if mon_nodes is None: raise RuntimeError("no monitor nodes in the config file") estatus_new = execute_ceph_deploy(new_mon) if estatus_new != 0: raise RuntimeError("ceph-deploy: new command failed") log.info("adding config inputs...") testdir = teuthology.get_testdir(ctx) conf_path = "{tdir}/ceph-deploy/ceph.conf".format(tdir=testdir) if config.get("conf") is not None: confp = config.get("conf") for section, keys in confp.iteritems(): lines = "[{section}]\n".format(section=section) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) for key, value in keys.iteritems(): log.info("[%s] %s = %s" % (section, key, value)) lines = "{key} = {value}\n".format(key=key, value=value) teuthology.append_lines_to_file(ceph_admin, conf_path, lines, sudo=True) # install ceph install_nodes = "./ceph-deploy install " + (ceph_branch if ceph_branch else "--dev=master") + " " + all_nodes estatus_install = execute_ceph_deploy(install_nodes) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph") # install ceph-test package too install_nodes2 = ( "./ceph-deploy install --tests " + (ceph_branch if ceph_branch else "--dev=master") + " " + all_nodes ) estatus_install = execute_ceph_deploy(install_nodes2) if estatus_install != 0: raise RuntimeError("ceph-deploy: Failed to install ceph-test") mon_create_nodes = "./ceph-deploy mon create-initial" # If the following fails, it is OK, it might just be that the monitors # are taking way more than a minute/monitor to form quorum, so lets # try the next block which will wait up to 15 minutes to gatherkeys. execute_ceph_deploy(mon_create_nodes) estatus_gather = execute_ceph_deploy(gather_keys) max_gather_tries = 90 gather_tries = 0 while estatus_gather != 0: gather_tries += 1 if gather_tries >= max_gather_tries: msg = "ceph-deploy was not able to gatherkeys after 15 minutes" raise RuntimeError(msg) estatus_gather = execute_ceph_deploy(gather_keys) time.sleep(10) if mds_nodes: estatus_mds = execute_ceph_deploy(deploy_mds) if estatus_mds != 0: raise RuntimeError("ceph-deploy: Failed to deploy mds") if config.get("test_mon_destroy") is not None: for d in range(1, len(mon_node)): mon_destroy_nodes = "./ceph-deploy mon destroy" + " " + mon_node[d] estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) if estatus_mon_d != 0: raise RuntimeError("ceph-deploy: Failed to delete monitor") node_dev_list = get_dev_for_osd(ctx, config) for d in node_dev_list: node = d[0] for disk in d[1:]: zap = "./ceph-deploy disk zap " + node + ":" + disk estatus = execute_ceph_deploy(zap) if estatus != 0: raise RuntimeError("ceph-deploy: Failed to zap osds") osd_create_cmd = "./ceph-deploy osd create " if config.get("dmcrypt") is not None: osd_create_cmd += "--dmcrypt " osd_create_cmd += ":".join(d) estatus_osd = execute_ceph_deploy(osd_create_cmd) if estatus_osd == 0: log.info("successfully created osd") no_of_osds += 1 else: raise RuntimeError("ceph-deploy: Failed to create osds") if config.get("wait-for-healthy", True) and no_of_osds >= 2: is_healthy(ctx=ctx, config=None) log.info("Setting up client nodes...") conf_path = "/etc/ceph/ceph.conf" admin_keyring_path = "/etc/ceph/ceph.client.admin.keyring" first_mon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() conf_data = teuthology.get_file(remote=mon0_remote, path=conf_path, sudo=True) admin_keyring = teuthology.get_file(remote=mon0_remote, path=admin_keyring_path, sudo=True) clients = ctx.cluster.only(teuthology.is_type("client")) for remot, roles_for_host in clients.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, "client"): client_keyring = "/etc/ceph/ceph.client.{id}.keyring".format(id=id_) mon0_remote.run( args=[ "cd", "{tdir}".format(tdir=testdir), run.Raw("&&"), "sudo", "bash", "-c", run.Raw('"'), "ceph", "auth", "get-or-create", "client.{id}".format(id=id_), "mds", "allow", "mon", "allow *", "osd", "allow *", run.Raw(">"), client_keyring, run.Raw('"'), ] ) key_data = teuthology.get_file(remote=mon0_remote, path=client_keyring, sudo=True) teuthology.sudo_write_file(remote=remot, path=client_keyring, data=key_data, perms="0644") teuthology.sudo_write_file(remote=remot, path=admin_keyring_path, data=admin_keyring, perms="0644") teuthology.sudo_write_file(remote=remot, path=conf_path, data=conf_data, perms="0644") if mds_nodes: log.info("Configuring CephFS...") ceph_fs = Filesystem(ctx, admin_remote=clients.remotes.keys()[0]) if not ceph_fs.legacy_configured(): ceph_fs.create() elif not config.get("only_mon"): raise RuntimeError("The cluster is NOT operational due to insufficient OSDs") yield except Exception: log.info("Error encountered, logging exception before tearing down ceph-deploy") log.info(traceback.format_exc()) raise finally: if config.get("keep_running"): return log.info("Stopping ceph...") ctx.cluster.run( args=[ "sudo", "stop", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "stop", run.Raw("||"), "sudo", "systemctl", "stop", "ceph.target", ] ) # Are you really not running anymore? # try first with the init tooling # ignoring the status so this becomes informational only ctx.cluster.run( args=[ "sudo", "status", "ceph-all", run.Raw("||"), "sudo", "service", "ceph", "status", run.Raw("||"), "sudo", "systemctl", "status", "ceph.target", ], check_status=False, ) # and now just check for the processes themselves, as if upstart/sysvinit # is lying to us. Ignore errors if the grep fails ctx.cluster.run( args=["sudo", "ps", "aux", run.Raw("|"), "grep", "-v", "grep", run.Raw("|"), "grep", "ceph"], check_status=False, ) if ctx.archive is not None: # archive mon data, too log.info("Archiving mon data...") path = os.path.join(ctx.archive, "data") os.makedirs(path) mons = ctx.cluster.only(teuthology.is_type("mon")) for remote, roles in mons.remotes.iteritems(): for role in roles: if role.startswith("mon."): teuthology.pull_directory_tarball(remote, "/var/lib/ceph/mon", path + "/" + role + ".tgz") log.info("Compressing logs...") run.wait( ctx.cluster.run( args=[ "sudo", "find", "/var/log/ceph", "-name", "*.log", "-print0", run.Raw("|"), "sudo", "xargs", "-0", "--no-run-if-empty", "--", "gzip", "--", ], wait=False, ) ) log.info("Archiving logs...") path = os.path.join(ctx.archive, "remote") os.makedirs(path) for remote in ctx.cluster.remotes.iterkeys(): sub = os.path.join(path, remote.shortname) os.makedirs(sub) teuthology.pull_directory(remote, "/var/log/ceph", os.path.join(sub, "log")) # Prevent these from being undefined if the try block fails all_nodes = get_all_nodes(ctx, config) purge_nodes = "./ceph-deploy purge" + " " + all_nodes purgedata_nodes = "./ceph-deploy purgedata" + " " + all_nodes log.info("Purging package...") execute_ceph_deploy(purge_nodes) log.info("Purging data...") execute_ceph_deploy(purgedata_nodes)