def begin(self): super(CephFSMirror, self).begin() testdir = misc.get_testdir(self.ctx) args = [ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] if 'valgrind' in self.config: args = misc.get_valgrind_args( testdir, 'cephfs-mirror-{id}'.format(id=self.client), args, self.config.get('valgrind')) args.extend([ 'cephfs-mirror', '--cluster', self.cluster_name, '--id', self.client_id, ]) self.ctx.daemons.add_daemon( self.remote, 'cephfs-mirror', self.client, args=args, logger=self.log.getChild(self.client), stdin=run.PIPE, wait=False, )
def _run_one_client(ctx, config, role): """Spawned task that runs the client""" krbd = config.get('krbd', False) nbd = config.get('nbd', False) testdir = teuthology.get_testdir(ctx) (remote,) = ctx.cluster.only(role).remotes.iterkeys() args = [] if krbd or nbd: args.append('sudo') # rbd(-nbd) map/unmap need privileges args.extend([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir) ]) overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('rbd_fsx', {})) if config.get('valgrind'): args = teuthology.get_valgrind_args( testdir, 'fsx_{id}'.format(id=role), args, config.get('valgrind') ) args.extend([ 'ceph_test_librbd_fsx', '-d', # debug output for all operations '-W', '-R', # mmap doesn't work with rbd '-p', str(config.get('progress_interval', 100)), # show progress '-P', '{tdir}/archive'.format(tdir=testdir), '-r', str(config.get('readbdy',1)), '-w', str(config.get('writebdy',1)), '-t', str(config.get('truncbdy',1)), '-h', str(config.get('holebdy',1)), '-l', str(config.get('size', 250000000)), '-S', str(config.get('seed', 0)), '-N', str(config.get('ops', 1000)), ]) if krbd: args.append('-K') # -K enables krbd mode if nbd: args.append('-M') # -M enables nbd mode if config.get('direct_io', False): args.append('-Z') # -Z use direct IO if not config.get('randomized_striping', True): args.append('-U') # -U disables randomized striping if not config.get('punch_holes', True): args.append('-H') # -H disables discard ops if config.get('journal_replay', False): args.append('-j') # -j replay all IO events from journal args.extend([ 'pool_{pool}'.format(pool=role), 'image_{image}'.format(image=role), ]) remote.run(args=args)
def _run_one_client(ctx, config, role): """Spawned task that runs the client""" krbd = config.get('krbd', False) testdir = teuthology.get_testdir(ctx) (remote,) = ctx.cluster.only(role).remotes.iterkeys() args = [] if krbd: args.append('sudo') # rbd map/unmap need privileges args.extend([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir) ]) overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('rbd_fsx', {})) if config.get('valgrind'): args = teuthology.get_valgrind_args( testdir, 'fsx_{id}'.format(id=role), args, config.get('valgrind') ) args.extend([ 'ceph_test_librbd_fsx', '-d', # debug output for all operations '-W', '-R', # mmap doesn't work with rbd '-p', str(config.get('progress_interval', 100)), # show progress '-P', '{tdir}/archive'.format(tdir=testdir), '-r', str(config.get('readbdy',1)), '-w', str(config.get('writebdy',1)), '-t', str(config.get('truncbdy',1)), '-h', str(config.get('holebdy',1)), '-l', str(config.get('size', 250000000)), '-S', str(config.get('seed', 0)), '-N', str(config.get('ops', 1000)), ]) if krbd: args.append('-K') # -K enables krbd mode if config.get('direct_io', False): args.append('-Z') # -Z use direct IO if not config.get('randomized_striping', True): args.append('-U') # -U disables randomized striping if not config.get('punch_holes', True): args.append('-H') # -H disables discard ops if config.get('journal_replay', False): args.append('-j') # -j replay all IO events from journal args.extend([ 'pool_{pool}'.format(pool=role), 'image_{image}'.format(image=role), ]) remote.run(args=args)
def start_rgw(ctx, config): log.info('Starting rgw...') rgws = {} for client in config.iterkeys(): (remote,) = ctx.cluster.only(client).remotes.iterkeys() client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) run_cmd=[ 'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib', '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', '/tmp/cephtest/archive/coverage', '/tmp/cephtest/daemon-helper', 'term', ] run_cmd_tail=[ '/tmp/cephtest/binary/usr/local/bin/radosgw', '-c', '/tmp/cephtest/ceph.conf', '--log-file', '/tmp/cephtest/archive/log/rgw.log', '/tmp/cephtest/apache/apache.conf', '--foreground', run.Raw('>'), '/tmp/cephtest/archive/log/rgw.stdout', run.Raw('2>&1'), ] run_cmd.extend( teuthology.get_valgrind_args( client, client_config.get('valgrind') ) ) run_cmd.extend(run_cmd_tail) proc = remote.run( args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) rgws[client] = proc try: yield finally: log.info('Stopping rgw...') for client, proc in rgws.iteritems(): proc.stdin.close() run.wait(rgws.itervalues())
def mount(self): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' if self.client_config.get('coverage') or self.client_config.get( 'valgrind') is not None: daemon_signal = 'term' mnt = os.path.join(self.test_dir, 'mnt.{id}'.format(id=self.client_id)) log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=mnt)) self.client_remote.run(args=[ 'mkdir', '--', mnt, ], ) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), 'daemon-helper', daemon_signal, ] run_cmd_tail = [ 'ceph-fuse', '-f', '--name', 'client.{id}'.format(id=self.client_id), # TODO ceph-fuse doesn't understand dash dash '--', mnt, ] if self.client_config.get('valgrind') is not None: run_cmd = misc.get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), run_cmd, self.client_config.get('valgrind'), ) run_cmd.extend(run_cmd_tail) proc = self.client_remote.run( args=run_cmd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, wait=False, ) self.fuse_daemon = proc
def mount(self): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' if self.client_config.get('coverage') or self.client_config.get('valgrind') is not None: daemon_signal = 'term' mnt = os.path.join(self.test_dir, 'mnt.{id}'.format(id=self.client_id)) log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=mnt)) self.client_remote.run( args=[ 'mkdir', '--', mnt, ], ) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), 'daemon-helper', daemon_signal, ] run_cmd_tail = [ 'ceph-fuse', '-f', '--name', 'client.{id}'.format(id=self.client_id), # TODO ceph-fuse doesn't understand dash dash '--', mnt, ] if self.client_config.get('valgrind') is not None: run_cmd = misc.get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), run_cmd, self.client_config.get('valgrind'), ) run_cmd.extend(run_cmd_tail) proc = self.client_remote.run( args=run_cmd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, wait=False, ) self.fuse_daemon = proc
def begin(self): super(RBDMirror, self).begin() testdir = misc.get_testdir(self.ctx) daemon_signal = 'kill' if 'coverage' in self.config or 'valgrind' in self.config or \ self.config.get('thrash', False): daemon_signal = 'term' args = [ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', daemon_signal, ] if 'valgrind' in self.config: args = misc.get_valgrind_args( testdir, 'rbd-mirror-{id}'.format(id=self.client), args, self.config.get('valgrind') ) args.extend([ 'rbd-mirror', '--foreground', '--cluster', self.cluster_name, '--id', self.client_id, ]) self.ctx.daemons.add_daemon( self.remote, 'rbd-mirror', self.client, cluster=self.cluster_name, args=args, logger=self.log.getChild(self.client), stdin=run.PIPE, wait=False, )
def start_rgw(ctx, config, on_client = None, except_client = None): """ Start rgw on remote sites. """ log.info('Starting rgw...') log.debug('client %r', on_client) clients_to_run = [on_client] if on_client is None: clients_to_run = config.keys() log.debug('client %r', clients_to_run) testdir = teuthology.get_testdir(ctx) for client in clients_to_run: if client == except_client: continue (remote,) = ctx.cluster.only(client).remotes.iterkeys() cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id zone = rgw_utils.zone_for_client(ctx, client) log.debug('zone %s', zone) client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] if ctx.rgw.frontend == 'apache': if ctx.rgw.use_fastcgi or _use_uds_with_fcgi(remote): rgw_cmd.extend([ '--rgw-socket-path', '{tdir}/apache/tmp.{client_with_cluster}/fastcgi_sock/rgw_sock'.format( tdir=testdir, client_with_cluster=client_with_cluster, ), '--rgw-frontends', 'fastcgi', ]) else: # for mod_proxy_fcgi, using tcp rgw_cmd.extend([ '--rgw-socket-path', '', '--rgw-print-continue', 'false', '--rgw-frontends', 'fastcgi socket_port=9000 socket_host=0.0.0.0', ]) elif ctx.rgw.frontend == 'civetweb': host, port = ctx.rgw.role_endpoints[client] rgw_cmd.extend([ '--rgw-frontends', 'civetweb port={port}'.format(port=port), ]) if zone is not None: rgw_cmd.extend(['--rgw-zone', zone]) rgw_cmd.extend([ '-n', client_with_id, '--cluster', cluster_name, '-k', '/etc/ceph/{client_with_cluster}.keyring'.format(client_with_cluster=client_with_cluster), '--log-file', '/var/log/ceph/rgw.{client_with_cluster}.log'.format(client_with_cluster=client_with_cluster), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format(tdir=testdir, client_with_cluster=client_with_cluster), '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format(tdir=testdir, client_with_cluster=client_with_cluster), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = teuthology.get_valgrind_args( testdir, client_with_cluster, cmd_prefix, client_config.get('valgrind') ) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client_with_id, cluster=cluster_name, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) # XXX: add_daemon() doesn't let us wait until radosgw finishes startup for client in clients_to_run: if client == except_client: continue host, port = ctx.rgw.role_endpoints[client] endpoint = 'http://{host}:{port}/'.format(host=host, port=port) log.info('Polling {client} until it starts accepting connections on {endpoint}'.format(client=client, endpoint=endpoint)) wait_for_radosgw(endpoint) try: yield finally: for client in config.iterkeys(): cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop() ctx.cluster.only(client).run( args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client_with_cluster), ], )
def _mount(self, mount_path, mount_fs_name): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' if self.client_config.get('coverage') or self.client_config.get('valgrind') is not None: daemon_signal = 'term' log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) self.client_remote.run( args=[ 'mkdir', '--', self.mountpoint, ], ) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), 'daemon-helper', daemon_signal, ] fuse_cmd = ['ceph-fuse', "-f"] if mount_path is not None: fuse_cmd += ["--client_mountpoint={0}".format(mount_path)] if mount_fs_name is not None: fuse_cmd += ["--client_mds_namespace={0}".format(mount_fs_name)] fuse_cmd += [ '--name', 'client.{id}'.format(id=self.client_id), # TODO ceph-fuse doesn't understand dash dash '--', self.mountpoint, ] if self.client_config.get('valgrind') is not None: run_cmd = misc.get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), run_cmd, self.client_config.get('valgrind'), ) run_cmd.extend(fuse_cmd) def list_connections(): self.client_remote.run( args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"], check_status=False ) p = self.client_remote.run( args=["ls", "/sys/fs/fuse/connections"], stdout=StringIO(), check_status=False ) if p.exitstatus != 0: return [] ls_str = p.stdout.getvalue().strip() if ls_str: return [int(n) for n in ls_str.split("\n")] else: return [] # Before starting ceph-fuse process, note the contents of # /sys/fs/fuse/connections pre_mount_conns = list_connections() log.info("Pre-mount connections: {0}".format(pre_mount_conns)) proc = self.client_remote.run( args=run_cmd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, wait=False, ) self.fuse_daemon = proc # Wait for the connection reference to appear in /sys mount_wait = self.client_config.get('mount_wait', 0) if mount_wait > 0: log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait)) time.sleep(mount_wait) timeout = int(self.client_config.get('mount_timeout', 30)) waited = 0 post_mount_conns = list_connections() while len(post_mount_conns) <= len(pre_mount_conns): if self.fuse_daemon.finished: # Did mount fail? Raise the CommandFailedError instead of # hitting the "failed to populate /sys/" timeout self.fuse_daemon.wait() time.sleep(1) waited += 1 if waited > timeout: raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format( waited )) else: post_mount_conns = list_connections() log.info("Post-mount connections: {0}".format(post_mount_conns)) # Record our fuse connection number so that we can use it when # forcing an unmount new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) if len(new_conns) == 0: raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns)) elif len(new_conns) > 1: raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns)) else: self._fuse_conn = new_conns[0]
def start_rgw(ctx, config): """ Start rgw on remote sites. """ log.info('Starting rgw...') testdir = teuthology.get_testdir(ctx) for client in config.iterkeys(): (remote,) = ctx.cluster.only(client).remotes.iterkeys() client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) id_ = client.split('.', 1)[1] log.info('client {client} is id {id}'.format(client=client, id=id_)) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] if ctx.rgw.frontend == 'apache': rgw_cmd.extend([ '--rgw-socket-path', '{tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock'.format( tdir=testdir, client=client, ), ]) elif ctx.rgw.frontend == 'civetweb': host, port = ctx.rgw.role_endpoints[client] rgw_cmd.extend([ '--rgw-frontends', 'civetweb port={port}'.format(port=port), ]) rgw_cmd.extend([ '-n', client, '-k', '/etc/ceph/ceph.{client}.keyring'.format(client=client), '--log-file', '/var/log/ceph/rgw.{client}.log'.format(client=client), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client), '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client}.stdout'.format(tdir=testdir, client=client), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = teuthology.get_valgrind_args( testdir, client, cmd_prefix, client_config.get('valgrind') ) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) try: yield finally: teuthology.stop_daemons_of_type(ctx, 'rgw') for client in config.iterkeys(): ctx.cluster.only(client).run( args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client), ], )
def run_daemon(ctx, config, type_): """ Run daemons for a role type. Handle the startup and termination of a a daemon. On startup -- set coverages, cpu_profile, valgrind values for all remotes, and a max_mds value for one mds. On cleanup -- Stop all existing daemons of this type. :param ctx: Context :param config: Configuration :paran type_: Role type """ cluster_name = config['cluster'] log.info('Starting %s daemons in cluster %s...', type_, cluster_name) testdir = teuthology.get_testdir(ctx) daemons = ctx.cluster.only(teuthology.is_type(type_, cluster_name)) # check whether any daemons if this type are configured if daemons is None: return coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) daemon_signal = 'kill' if config.get('coverage') or config.get('valgrind') is not None: daemon_signal = 'term' for remote, roles_for_host in daemons.remotes.iteritems(): is_type_ = teuthology.is_type(type_, cluster_name) for role in roles_for_host: if not is_type_(role): continue _, _, id_ = teuthology.split_role(role) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'daemon-helper', daemon_signal, ] run_cmd_tail = [ 'ceph-%s' % (type_), '-f', '--cluster', cluster_name, '-i', id_ ] if type_ in config.get('cpu_profile', []): profile_path = '/var/log/ceph/profiling-logger/%s.prof' % ( role) run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path]) if config.get('valgrind') is not None: valgrind_args = None if type_ in config['valgrind']: valgrind_args = config['valgrind'][type_] if role in config['valgrind']: valgrind_args = config['valgrind'][role] run_cmd = teuthology.get_valgrind_args(testdir, role, run_cmd, valgrind_args) run_cmd.extend(run_cmd_tail) ctx.daemons.add_daemon( remote, type_, id_, cluster=cluster_name, args=run_cmd, logger=log.getChild(role), stdin=run.PIPE, wait=False, ) try: yield finally: teuthology.stop_daemons_of_type(ctx, type_, cluster_name)
def _mount(self, mntopts, check_status): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' if self.client_config.get('coverage') or \ self.client_config.get('valgrind') is not None: daemon_signal = 'term' # Use 0000 mode to prevent undesired modifications to the mountpoint on # the local file system. script = f'mkdir -m 0000 -p -v {self.hostfs_mntpt}'.split() stderr = StringIO() try: self.client_remote.run(args=script, timeout=(15 * 60), cwd=self.test_dir, stderr=StringIO()) except CommandFailedError: if 'file exists' not in stderr.getvalue().lower(): raise run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), 'daemon-helper', daemon_signal, ] fuse_cmd = [ 'ceph-fuse', "-f", "--admin-socket", "/var/run/ceph/$cluster-$name.$pid.asok", ] if self.client_id is not None: fuse_cmd += ['--id', self.client_id] if self.client_keyring_path and self.client_id is not None: fuse_cmd += ['-k', self.client_keyring_path] if self.cephfs_mntpt is not None: fuse_cmd += ["--client_mountpoint=" + self.cephfs_mntpt] if self.cephfs_name is not None: fuse_cmd += ["--client_fs=" + self.cephfs_name] if mntopts: fuse_cmd += mntopts fuse_cmd.append(self.hostfs_mntpt) cwd = self.test_dir if self.client_config.get('valgrind') is not None: run_cmd = misc.get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), run_cmd, self.client_config.get('valgrind'), ) cwd = None # misc.get_valgrind_args chdir for us netns_prefix = [ 'sudo', 'nsenter', '--net=/var/run/netns/{0}'.format(self.netns_name) ] run_cmd = netns_prefix + run_cmd run_cmd.extend(fuse_cmd) def list_connections(): conn_dir = "/sys/fs/fuse/connections" self.client_remote.run(args=['sudo', 'modprobe', 'fuse'], check_status=False) self.client_remote.run( args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir], check_status=False, timeout=(30)) try: ls_str = self.client_remote.sh("ls " + conn_dir, stdout=StringIO(), timeout=(15 * 60)).strip() except CommandFailedError: return [] if ls_str: return [int(n) for n in ls_str.split("\n")] else: return [] # Before starting ceph-fuse process, note the contents of # /sys/fs/fuse/connections pre_mount_conns = list_connections() log.info("Pre-mount connections: {0}".format(pre_mount_conns)) mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() self.fuse_daemon = self.client_remote.run( args=run_cmd, cwd=cwd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, stdout=mountcmd_stdout, stderr=mountcmd_stderr, wait=False) # Wait for the connection reference to appear in /sys mount_wait = self.client_config.get('mount_wait', 0) if mount_wait > 0: log.info( "Fuse mount waits {0} seconds before checking /sys/".format( mount_wait)) time.sleep(mount_wait) timeout = int(self.client_config.get('mount_timeout', 30)) waited = 0 post_mount_conns = list_connections() while len(post_mount_conns) <= len(pre_mount_conns): if self.fuse_daemon.finished: # Did mount fail? Raise the CommandFailedError instead of # hitting the "failed to populate /sys/" timeout try: self.fuse_daemon.wait() except CommandFailedError as e: log.info('mount command failed.') if check_status: raise else: return (e, mountcmd_stdout.getvalue(), mountcmd_stderr.getvalue()) time.sleep(1) waited += 1 if waited > timeout: raise RuntimeError( "Fuse mount failed to populate/sys/ after {} " "seconds".format(waited)) else: post_mount_conns = list_connections() log.info("Post-mount connections: {0}".format(post_mount_conns)) # Record our fuse connection number so that we can use it when # forcing an unmount new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) if len(new_conns) == 0: raise RuntimeError( "New fuse connection directory not found ({0})".format( new_conns)) elif len(new_conns) > 1: raise RuntimeError( "Unexpectedly numerous fuse connections {0}".format(new_conns)) else: self._fuse_conn = new_conns[0] self.gather_mount_info() self.mounted = True
def run_daemon(ctx, config, type_): """ Run daemons for a role type. Handle the startup and termination of a a daemon. On startup -- set coverages, cpu_profile, valgrind values for all remotes, and a max_mds value for one mds. On cleanup -- Stop all existing daemons of this type. :param ctx: Context :param config: Configuration :paran type_: Role type """ log.info('Starting %s daemons...' % type_) testdir = teuthology.get_testdir(ctx) daemons = ctx.cluster.only(teuthology.is_type(type_)) # check whether any daemons if this type are configured if daemons is None: return coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) daemon_signal = 'kill' if config.get('coverage') or config.get('valgrind') is not None: daemon_signal = 'term' num_active = 0 for remote, roles_for_host in daemons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, type_): name = '%s.%s' % (type_, id_) if not (id_.endswith('-s')) and (id_.find('-s-') == -1): num_active += 1 run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'daemon-helper', daemon_signal, ] run_cmd_tail = ['ceph-%s' % (type_), '-f', '-i', id_] if type_ in config.get('cpu_profile', []): profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % ( type_, id_) run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path]) if config.get('valgrind') is not None: valgrind_args = None if type_ in config['valgrind']: valgrind_args = config['valgrind'][type_] if name in config['valgrind']: valgrind_args = config['valgrind'][name] run_cmd = teuthology.get_valgrind_args(testdir, name, run_cmd, valgrind_args) run_cmd.extend(run_cmd_tail) ctx.daemons.add_daemon( remote, type_, id_, args=run_cmd, logger=log.getChild(name), stdin=run.PIPE, wait=False, ) if type_ == 'mds': firstmon = teuthology.get_first_mon(ctx, config) (mon0_remote, ) = ctx.cluster.only(firstmon).remotes.keys() mon0_remote.run(args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph', 'mds', 'set_max_mds', str(num_active) ]) try: yield finally: teuthology.stop_daemons_of_type(ctx, type_)
def task(ctx, config): """ Mount/unmount a ``ceph-fuse`` client. The config is optional and defaults to mounting on all clients. If a config is given, it is expected to be a list of clients to do this operation on. This lets you e.g. set up one client with ``ceph-fuse`` and another with ``kclient``. Example that mounts all clients:: tasks: - ceph: - ceph-fuse: - interactive: Example that uses both ``kclient` and ``ceph-fuse``:: tasks: - ceph: - ceph-fuse: [client.0] - kclient: [client.1] - interactive: Example that enables valgrind: tasks: - ceph: - ceph-fuse: client.0: valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] - interactive: """ log.info('Mounting ceph-fuse clients...') fuse_daemons = {} if config is None: config = dict(('client.{id}'.format(id=id_), None) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) elif isinstance(config, list): config = dict((name, None) for name in config) overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph-fuse', {})) clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) for id_, remote in clients: mnt = os.path.join('/tmp/cephtest', 'mnt.{id}'.format(id=id_)) log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=id_, remote=remote,mnt=mnt)) client_config = config.get("client.%s" % id_) if client_config is None: client_config = {} log.info("Client client.%s config is %s" % (id_, client_config)) daemon_signal = 'kill' if client_config.get('coverage') or client_config.get('valgrind') is not None: daemon_signal = 'term' remote.run( args=[ 'mkdir', '--', mnt, ], ) run_cmd=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', '/tmp/cephtest/archive/coverage', '/tmp/cephtest/daemon-helper', daemon_signal, ] run_cmd_tail=[ '/tmp/cephtest/binary/usr/local/bin/ceph-fuse', '-f', '--name', 'client.{id}'.format(id=id_), '-c', '/tmp/cephtest/ceph.conf', # TODO ceph-fuse doesn't understand dash dash '--', mnt, ] if client_config.get('valgrind') is not None: run_cmd.extend( teuthology.get_valgrind_args( 'client.{id}'.format(id=id_), client_config.get('valgrind'), ) ) run_cmd.extend(run_cmd_tail) proc = remote.run( args=run_cmd, logger=log.getChild('ceph-fuse.{id}'.format(id=id_)), stdin=run.PIPE, wait=False, ) fuse_daemons[id_] = proc for id_, remote in clients: mnt = os.path.join('/tmp/cephtest', 'mnt.{id}'.format(id=id_)) teuthology.wait_until_fuse_mounted( remote=remote, fuse=fuse_daemons[id_], mountpoint=mnt, ) remote.run(args=['sudo', 'chmod', '1777', '/tmp/cephtest/mnt.{id}'.format(id=id_)],) try: yield finally: log.info('Unmounting ceph-fuse clients...') for id_, remote in clients: mnt = os.path.join('/tmp/cephtest', 'mnt.{id}'.format(id=id_)) try: remote.run( args=[ 'fusermount', '-u', mnt, ], ) except CommandFailedError as e: log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=remote.name)) # abort the fuse mount, killing all hung processes remote.run( args=[ 'echo', '1', run.Raw('>'), run.Raw('/sys/fs/fuse/connections/*/abort'), ], ) # make sure its unmounted remote.run( args=[ 'sudo', 'umount', '-l', '-f', mnt, ], ) run.wait(fuse_daemons.itervalues()) for id_, remote in clients: mnt = os.path.join('/tmp/cephtest', 'mnt.{id}'.format(id=id_)) remote.run( args=[ 'rmdir', '--', mnt, ], )
def _mount(self, mount_path, mount_fs_name, mount_options): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' if self.client_config.get('coverage') or self.client_config.get( 'valgrind') is not None: daemon_signal = 'term' log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) self.client_remote.run(args=['mkdir', '-p', self.mountpoint], timeout=(15 * 60), cwd=self.test_dir) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), 'daemon-helper', daemon_signal, ] fuse_cmd = ['ceph-fuse', "-f"] if mount_path is not None: fuse_cmd += ["--client_mountpoint={0}".format(mount_path)] if mount_fs_name is not None: fuse_cmd += ["--client_fs={0}".format(mount_fs_name)] fuse_cmd += mount_options fuse_cmd += [ '--name', 'client.{id}'.format(id=self.client_id), # TODO ceph-fuse doesn't understand dash dash '--', self.mountpoint, ] cwd = self.test_dir if self.client_config.get('valgrind') is not None: run_cmd = misc.get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), run_cmd, self.client_config.get('valgrind'), ) cwd = None # misc.get_valgrind_args chdir for us netns_prefix = [ 'sudo', 'nsenter', '--net=/var/run/netns/{0}'.format(self.netns_name) ] run_cmd = netns_prefix + run_cmd run_cmd.extend(fuse_cmd) def list_connections(): from teuthology.misc import get_system_type conn_dir = "/sys/fs/fuse/connections" self.client_remote.run(args=['sudo', 'modprobe', 'fuse'], check_status=False) self.client_remote.run( args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir], check_status=False, timeout=(30)) try: ls_str = self.client_remote.sh("ls " + conn_dir, stdout=StringIO(), timeout=(15 * 60)).strip() except CommandFailedError: return [] if ls_str: return [int(n) for n in ls_str.split("\n")] else: return [] # Before starting ceph-fuse process, note the contents of # /sys/fs/fuse/connections pre_mount_conns = list_connections() log.info("Pre-mount connections: {0}".format(pre_mount_conns)) proc = self.client_remote.run( args=run_cmd, cwd=cwd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, wait=False, ) self.fuse_daemon = proc # Wait for the connection reference to appear in /sys mount_wait = self.client_config.get('mount_wait', 0) if mount_wait > 0: log.info( "Fuse mount waits {0} seconds before checking /sys/".format( mount_wait)) time.sleep(mount_wait) timeout = int(self.client_config.get('mount_timeout', 30)) waited = 0 post_mount_conns = list_connections() while len(post_mount_conns) <= len(pre_mount_conns): if self.fuse_daemon.finished: # Did mount fail? Raise the CommandFailedError instead of # hitting the "failed to populate /sys/" timeout self.fuse_daemon.wait() time.sleep(1) waited += 1 if waited > timeout: raise RuntimeError( "Fuse mount failed to populate /sys/ after {0} seconds". format(waited)) else: post_mount_conns = list_connections() log.info("Post-mount connections: {0}".format(post_mount_conns)) # Record our fuse connection number so that we can use it when # forcing an unmount new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) if len(new_conns) == 0: raise RuntimeError( "New fuse connection directory not found ({0})".format( new_conns)) elif len(new_conns) > 1: raise RuntimeError( "Unexpectedly numerous fuse connections {0}".format(new_conns)) else: self._fuse_conn = new_conns[0] self.gather_mount_info() self.mounted = True
def task(ctx, config): """ Mount/unmount a ``ceph-fuse`` client. The config is optional and defaults to mounting on all clients. If a config is given, it is expected to be a list of clients to do this operation on. This lets you e.g. set up one client with ``ceph-fuse`` and another with ``kclient``. Example that mounts all clients:: tasks: - ceph: - ceph-fuse: - interactive: Example that uses both ``kclient` and ``ceph-fuse``:: tasks: - ceph: - ceph-fuse: [client.0] - kclient: [client.1] - interactive: Example that enables valgrind: tasks: - ceph: - ceph-fuse: client.0: valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] - interactive: :param ctx: Context :param config: Configuration """ log.info("Mounting ceph-fuse clients...") fuse_daemons = {} testdir = teuthology.get_testdir(ctx) if config is None: config = dict( ("client.{id}".format(id=id_), None) for id_ in teuthology.all_roles_of_type(ctx.cluster, "client") ) elif isinstance(config, list): config = dict((name, None) for name in config) overrides = ctx.config.get("overrides", {}) teuthology.deep_merge(config, overrides.get("ceph-fuse", {})) clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) for id_, remote in clients: client_config = config.get("client.%s" % id_) if client_config is None: client_config = {} log.info("Client client.%s config is %s" % (id_, client_config)) daemon_signal = "kill" if client_config.get("coverage") or client_config.get("valgrind") is not None: daemon_signal = "term" mnt = os.path.join(testdir, "mnt.{id}".format(id=id_)) log.info("Mounting ceph-fuse client.{id} at {remote} {mnt}...".format(id=id_, remote=remote, mnt=mnt)) remote.run(args=["mkdir", "--", mnt]) run_cmd = [ "sudo", "adjust-ulimits", "ceph-coverage", "{tdir}/archive/coverage".format(tdir=testdir), "daemon-helper", daemon_signal, ] run_cmd_tail = [ "ceph-fuse", "-f", "--name", "client.{id}".format(id=id_), # TODO ceph-fuse doesn't understand dash dash '--', mnt, ] if client_config.get("valgrind") is not None: run_cmd = teuthology.get_valgrind_args( testdir, "client.{id}".format(id=id_), run_cmd, client_config.get("valgrind") ) run_cmd.extend(run_cmd_tail) proc = remote.run( args=run_cmd, logger=log.getChild("ceph-fuse.{id}".format(id=id_)), stdin=run.PIPE, wait=False ) fuse_daemons[id_] = proc for id_, remote in clients: mnt = os.path.join(testdir, "mnt.{id}".format(id=id_)) teuthology.wait_until_fuse_mounted(remote=remote, fuse=fuse_daemons[id_], mountpoint=mnt) remote.run(args=["sudo", "chmod", "1777", "{tdir}/mnt.{id}".format(tdir=testdir, id=id_)]) try: yield finally: log.info("Unmounting ceph-fuse clients...") for id_, remote in clients: mnt = os.path.join(testdir, "mnt.{id}".format(id=id_)) try: remote.run(args=["sudo", "fusermount", "-u", mnt]) except run.CommandFailedError: log.info("Failed to unmount ceph-fuse on {name}, aborting...".format(name=remote.name)) # abort the fuse mount, killing all hung processes remote.run( args=[ "if", "test", "-e", "/sys/fs/fuse/connections/*/abort", run.Raw(";"), "then", "echo", "1", run.Raw(">"), run.Raw("/sys/fs/fuse/connections/*/abort"), run.Raw(";"), "fi", ] ) # make sure its unmounted remote.run(args=["sudo", "umount", "-l", "-f", mnt]) run.wait(fuse_daemons.itervalues()) for id_, remote in clients: mnt = os.path.join(testdir, "mnt.{id}".format(id=id_)) remote.run(args=["rmdir", "--", mnt])
def start_rgw(ctx, config): """ Start rgw on remote sites. """ log.info('Starting rgw...') testdir = teuthology.get_testdir(ctx) for client in config.iterkeys(): (remote, ) = ctx.cluster.only(client).remotes.iterkeys() client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) id_ = client.split('.', 1)[1] log.info('client {client} is id {id}'.format(client=client, id=id_)) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] if ctx.rgw.frontend == 'apache': rgw_cmd.extend([ '--rgw-socket-path', '{tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock'.format( tdir=testdir, client=client, ), ]) elif ctx.rgw.frontend == 'civetweb': host, port = ctx.rgw.role_endpoints[client] rgw_cmd.extend([ '--rgw-frontends', 'civetweb port={port}'.format(port=port), ]) rgw_cmd.extend([ '-n', client, '-k', '/etc/ceph/ceph.{client}.keyring'.format(client=client), '--log-file', '/var/log/ceph/rgw.{client}.log'.format(client=client), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client), '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client}.stdout'.format(tdir=testdir, client=client), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = teuthology.get_valgrind_args( testdir, client, cmd_prefix, client_config.get('valgrind')) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) try: yield finally: teuthology.stop_daemons_of_type(ctx, 'rgw') for client in config.iterkeys(): ctx.cluster.only(client).run(args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client), ], )
def mount(self): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' if self.client_config.get('coverage') or self.client_config.get( 'valgrind') is not None: daemon_signal = 'term' mnt = os.path.join(self.test_dir, 'mnt.{id}'.format(id=self.client_id)) log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=mnt)) self.client_remote.run(args=[ 'mkdir', '--', mnt, ], ) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), 'daemon-helper', daemon_signal, ] run_cmd_tail = [ 'ceph-fuse', '-f', '--name', 'client.{id}'.format(id=self.client_id), # TODO ceph-fuse doesn't understand dash dash '--', mnt, ] if self.client_config.get('valgrind') is not None: run_cmd = misc.get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), run_cmd, self.client_config.get('valgrind'), ) run_cmd.extend(run_cmd_tail) def list_connections(): self.client_remote.run(args=[ "sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections" ], check_status=False) p = self.client_remote.run(args=["ls", "/sys/fs/fuse/connections"], stdout=StringIO(), check_status=False) if p.exitstatus != 0: return [] ls_str = p.stdout.getvalue().strip() if ls_str: return [int(n) for n in ls_str.split("\n")] else: return [] # Before starting ceph-fuse process, note the contents of # /sys/fs/fuse/connections pre_mount_conns = list_connections() log.info("Pre-mount connections: {0}".format(pre_mount_conns)) proc = self.client_remote.run( args=run_cmd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, wait=False, ) self.fuse_daemon = proc # Wait for the connection reference to appear in /sys mount_wait = self.client_config.get('mount_wait', 0) if mount_wait > 0: log.info( "Fuse mount waits {0} seconds before checking /sys/".format( mount_wait)) time.sleep(mount_wait) timeout = int(self.client_config.get('mount_timeout', 30)) waited = 0 post_mount_conns = list_connections() while len(post_mount_conns) <= len(pre_mount_conns): time.sleep(1) waited += 1 if waited > timeout: raise RuntimeError( "Fuse mount failed to populate /sys/ after {0} seconds". format(waited)) else: post_mount_conns = list_connections() log.info("Post-mount connections: {0}".format(post_mount_conns)) # Record our fuse connection number so that we can use it when # forcing an unmount new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) if len(new_conns) == 0: raise RuntimeError( "New fuse connection directory not found ({0})".format( new_conns)) elif len(new_conns) > 1: raise RuntimeError( "Unexpectedly numerous fuse connections {0}".format(new_conns)) else: self._fuse_conn = new_conns[0]
def start_rgw(ctx, config, on_client = None, except_client = None): """ Start rgw on remote sites. """ log.info('Starting rgw...') log.debug('client %r', on_client) clients_to_run = [on_client] if on_client is None: clients_to_run = config.keys() testdir = teuthology.get_testdir(ctx) for client in clients_to_run: if client == except_client: continue (remote,) = ctx.cluster.only(client).remotes.iterkeys() zone = rgw_utils.zone_for_client(ctx, client) log.debug('zone %s', zone) client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) id_ = client.split('.', 1)[1] log.info('client {client} is id {id}'.format(client=client, id=id_)) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] if ctx.rgw.frontend == 'apache': if ctx.rgw.use_fastcgi or _use_uds_with_fcgi(remote): rgw_cmd.extend([ '--rgw-socket-path', '{tdir}/apache/tmp.{client}/fastcgi_sock/rgw_sock'.format( tdir=testdir, client=client, ), '--rgw-frontends', 'fastcgi', ]) else: # for mod_proxy_fcgi, using tcp rgw_cmd.extend([ '--rgw-socket-path', '', '--rgw-print-continue', 'false', '--rgw-frontends', 'fastcgi socket_port=9000 socket_host=0.0.0.0', ]) elif ctx.rgw.frontend == 'civetweb': host, port = ctx.rgw.role_endpoints[client] rgw_cmd.extend([ '--rgw-frontends', 'civetweb port={port}'.format(port=port), ]) if zone is not None: rgw_cmd.extend(['--rgw-zone', zone]) rgw_cmd.extend([ '-n', client, '-k', '/etc/ceph/ceph.{client}.keyring'.format(client=client), '--log-file', '/var/log/ceph/rgw.{client}.log'.format(client=client), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client), '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client}.stdout'.format(tdir=testdir, client=client), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = teuthology.get_valgrind_args( testdir, client, cmd_prefix, client_config.get('valgrind') ) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) try: yield finally: teuthology.stop_daemons_of_type(ctx, 'rgw') for client in config.iterkeys(): ctx.cluster.only(client).run( args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client), ], )
def start_rgw(ctx, config): log.info('Starting rgw...') testdir = teuthology.get_testdir(ctx) for client in config.iterkeys(): (remote,) = ctx.cluster.only(client).remotes.iterkeys() client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) id_ = client.split('.', 1)[1] log.info('client {client} is id {id}'.format(client=client, id=id_)) run_cmd=[ 'sudo', '{tdir}/adjust-ulimits'.format(tdir=testdir), 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), '{tdir}/daemon-helper'.format(tdir=testdir), 'term', ] run_cmd_tail=[ 'radosgw', '-n', client, '-k', '/etc/ceph/ceph.{client}.keyring'.format(client=client), '--log-file', '/var/log/ceph/rgw.{client}.log'.format(client=client), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client), '{tdir}/apache/apache.{client}.conf'.format(tdir=testdir, client=client), '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client}.stdout'.format(tdir=testdir, client=client), run.Raw('2>&1'), ] run_cmd.extend( teuthology.get_valgrind_args( testdir, client, client_config.get('valgrind') ) ) run_cmd.extend(run_cmd_tail) ctx.daemons.add_daemon( remote, 'rgw', client, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) try: yield finally: teuthology.stop_daemons_of_type(ctx, 'rgw') for client in config.iterkeys(): ctx.cluster.only(client).run( args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client), ], )
def mount(self): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' if self.client_config.get('coverage') or self.client_config.get('valgrind') is not None: daemon_signal = 'term' mnt = os.path.join(self.test_dir, 'mnt.{id}'.format(id=self.client_id)) log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=mnt)) self.client_remote.run( args=[ 'mkdir', '--', mnt, ], ) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), 'daemon-helper', daemon_signal, ] run_cmd_tail = [ 'ceph-fuse', '-f', '--name', 'client.{id}'.format(id=self.client_id), # TODO ceph-fuse doesn't understand dash dash '--', mnt, ] if self.client_config.get('valgrind') is not None: run_cmd = misc.get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), run_cmd, self.client_config.get('valgrind'), ) run_cmd.extend(run_cmd_tail) def list_connections(): self.client_remote.run( args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"], check_status=False ) p = self.client_remote.run( args=["ls", "/sys/fs/fuse/connections"], stdout=StringIO(), check_status=False ) if p.exitstatus != 0: return [] ls_str = p.stdout.getvalue().strip() if ls_str: return [int(n) for n in ls_str.split("\n")] else: return [] # Before starting ceph-fuse process, note the contents of # /sys/fs/fuse/connections pre_mount_conns = list_connections() log.info("Pre-mount connections: {0}".format(pre_mount_conns)) proc = self.client_remote.run( args=run_cmd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, wait=False, ) self.fuse_daemon = proc # Wait for the connection reference to appear in /sys waited = 0 while list_connections() == pre_mount_conns: time.sleep(1) waited += 1 if waited > 30: raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format( waited )) post_mount_conns = list_connections() log.info("Post-mount connections: {0}".format(post_mount_conns)) # Record our fuse connection number so that we can use it when # forcing an unmount new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) if len(new_conns) == 0: raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns)) elif len(new_conns) > 1: raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns)) else: self._fuse_conn = new_conns[0]
def start_rgw(ctx, config, clients): """ Start rgw on remote sites. """ log.info('Starting rgw...') testdir = teuthology.get_testdir(ctx) for client in clients: (remote,) = ctx.cluster.only(client).remotes.iterkeys() cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] log.info("Using %s as radosgw frontend", ctx.rgw.frontend) endpoint = ctx.rgw.role_endpoints[client] frontends = ctx.rgw.frontend frontend_prefix = client_config.get('frontend_prefix', None) if frontend_prefix: frontends += ' prefix={pfx}'.format(pfx=frontend_prefix) if endpoint.cert: # add the ssl certificate path frontends += ' ssl_certificate={}'.format(endpoint.cert.certificate) if ctx.rgw.frontend == 'civetweb': frontends += ' port={}s'.format(endpoint.port) else: frontends += ' ssl_port={}'.format(endpoint.port) else: frontends += ' port={}'.format(endpoint.port) rgw_cmd.extend([ '--rgw-frontends', frontends, '-n', client_with_id, '--cluster', cluster_name, '-k', '/etc/ceph/{client_with_cluster}.keyring'.format(client_with_cluster=client_with_cluster), '--log-file', '/var/log/ceph/rgw.{client_with_cluster}.log'.format(client_with_cluster=client_with_cluster), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format(tdir=testdir, client_with_cluster=client_with_cluster) ]) keystone_role = client_config.get('use-keystone-role', None) if keystone_role is not None: if not ctx.keystone: raise ConfigError('rgw must run after the keystone task') url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format(host=endpoint.hostname, port=endpoint.port) ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url) keystone_host, keystone_port = \ ctx.keystone.public_endpoints[keystone_role] rgw_cmd.extend([ '--rgw_keystone_url', 'http://{khost}:{kport}'.format(khost=keystone_host, kport=keystone_port), ]) rgw_cmd.extend([ '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format(tdir=testdir, client_with_cluster=client_with_cluster), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = teuthology.get_valgrind_args( testdir, client_with_cluster, cmd_prefix, client_config.get('valgrind') ) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client_with_id, cluster=cluster_name, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) # XXX: add_daemon() doesn't let us wait until radosgw finishes startup for client in clients: endpoint = ctx.rgw.role_endpoints[client] url = endpoint.url() log.info('Polling {client} until it starts accepting connections on {url}'.format(client=client, url=url)) wait_for_radosgw(url) try: yield finally: for client in clients: cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop() ctx.cluster.only(client).run( args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client_with_cluster), ], )
def start_rgw(ctx, config, on_client=None, except_client=None): """ Start rgw on remote sites. """ log.info('Starting rgw...') log.debug('client %r', on_client) clients_to_run = [on_client] if on_client is None: clients_to_run = config.keys() log.debug('client %r', clients_to_run) testdir = teuthology.get_testdir(ctx) for client in clients_to_run: if client == except_client: continue (remote, ) = ctx.cluster.only(client).remotes.iterkeys() cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id zone = rgw_utils.zone_for_client(ctx, client) log.debug('zone %s', zone) client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) id_ = client.split('.', 1)[1] log.info('client {client} is id {id}'.format(client=client, id=id_)) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] if ctx.rgw.frontend == 'apache': if ctx.rgw.use_fastcgi or _use_uds_with_fcgi(remote): rgw_cmd.extend([ '--rgw-socket-path', '{tdir}/apache/tmp.{client_with_cluster}/fastcgi_sock/rgw_sock' .format( tdir=testdir, client_with_cluster=client_with_cluster, ), '--rgw-frontends', 'fastcgi', ]) else: # for mod_proxy_fcgi, using tcp rgw_cmd.extend([ '--rgw-socket-path', '', '--rgw-print-continue', 'false', '--rgw-frontends', 'fastcgi socket_port=9000 socket_host=0.0.0.0', ]) elif ctx.rgw.frontend == 'civetweb': host, port = ctx.rgw.role_endpoints[client] rgw_cmd.extend([ '--rgw-frontends', 'civetweb port={port}'.format(port=port), ]) if zone is not None: rgw_cmd.extend(['--rgw-zone', zone]) rgw_cmd.extend([ '-n', client_with_id, '--cluster', cluster_name, '-k', '/etc/ceph/{client_with_cluster}.keyring'.format( client_with_cluster=client_with_cluster), '--log-file', '/var/log/ceph/rgw.{client_with_cluster}.log'.format( client_with_cluster=client_with_cluster), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format( tdir=testdir, client_with_cluster=client_with_cluster), '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format( tdir=testdir, client_with_cluster=client_with_cluster), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = teuthology.get_valgrind_args( testdir, client, cmd_prefix, client_config.get('valgrind')) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client, cluster=cluster_name, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) # XXX: add_daemon() doesn't let us wait until radosgw finishes startup # use a connection pool with retry/backoff to poll each gateway until it starts listening http = PoolManager(retries=Retry(connect=8, backoff_factor=1)) for client in clients_to_run: if client == except_client: continue host, port = ctx.rgw.role_endpoints[client] endpoint = 'http://{host}:{port}/'.format(host=host, port=port) log.info( 'Polling {client} until it starts accepting connections on {endpoint}' .format(client=client, endpoint=endpoint)) http.request('GET', endpoint) try: yield finally: teuthology.stop_daemons_of_type(ctx, 'rgw') for client in config.iterkeys(): ctx.cluster.only(client).run(args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format( tdir=testdir, client_with_cluster=client_with_cluster), ], )
def start_rgw(ctx, config, clients): """ Start rgw on remote sites. """ log.info('Starting rgw...') testdir = teuthology.get_testdir(ctx) for client in clients: (remote,) = ctx.cluster.only(client).remotes.iterkeys() cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] log.info("Using %s as radosgw frontend", ctx.rgw.frontend) host, port = ctx.rgw.role_endpoints[client] frontends = \ '{frontend} port={port}'.format(frontend=ctx.rgw.frontend, port=port) frontend_prefix = client_config.get('frontend_prefix', None) if frontend_prefix: frontends += ' prefix={pfx}'.format(pfx=frontend_prefix) rgw_cmd.extend([ '--rgw-frontends', frontends, '-n', client_with_id, '--cluster', cluster_name, '-k', '/etc/ceph/{client_with_cluster}.keyring'.format(client_with_cluster=client_with_cluster), '--log-file', '/var/log/ceph/rgw.{client_with_cluster}.log'.format(client_with_cluster=client_with_cluster), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format(tdir=testdir, client_with_cluster=client_with_cluster) ]) keystone_role = client_config.get('use-keystone-role', None) if keystone_role is not None: if not ctx.keystone: raise ConfigError('rgw must run after the keystone task') url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format(host=host, port=port) ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url) keystone_host, keystone_port = \ ctx.keystone.public_endpoints[keystone_role] rgw_cmd.extend([ '--rgw_keystone_url', 'http://{khost}:{kport}'.format(khost=keystone_host, kport=keystone_port), ]) rgw_cmd.extend([ '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format(tdir=testdir, client_with_cluster=client_with_cluster), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = teuthology.get_valgrind_args( testdir, client_with_cluster, cmd_prefix, client_config.get('valgrind') ) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client_with_id, cluster=cluster_name, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) # XXX: add_daemon() doesn't let us wait until radosgw finishes startup for client in config.keys(): host, port = ctx.rgw.role_endpoints[client] endpoint = 'http://{host}:{port}/'.format(host=host, port=port) log.info('Polling {client} until it starts accepting connections on {endpoint}'.format(client=client, endpoint=endpoint)) wait_for_radosgw(endpoint) try: yield finally: for client in config.iterkeys(): cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop() ctx.cluster.only(client).run( args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, client=client_with_cluster), ], )
def run_daemon(ctx, config, type_): """ Run daemons for a role type. Handle the startup and termination of a a daemon. On startup -- set coverages, cpu_profile, valgrind values for all remotes, and a max_mds value for one mds. On cleanup -- Stop all existing daemons of this type. :param ctx: Context :param config: Configuration :paran type_: Role type """ cluster_name = config["cluster"] log.info("Starting %s daemons in cluster %s...", type_, cluster_name) testdir = teuthology.get_testdir(ctx) daemons = ctx.cluster.only(teuthology.is_type(type_, cluster_name)) # check whether any daemons if this type are configured if daemons is None: return coverage_dir = "{tdir}/archive/coverage".format(tdir=testdir) daemon_signal = "kill" if config.get("coverage") or config.get("valgrind") is not None: daemon_signal = "term" for remote, roles_for_host in daemons.remotes.iteritems(): is_type_ = teuthology.is_type(type_, cluster_name) for role in roles_for_host: if not is_type_(role): continue _, _, id_ = teuthology.split_role(role) run_cmd = ["sudo", "adjust-ulimits", "ceph-coverage", coverage_dir, "daemon-helper", daemon_signal] run_cmd_tail = ["ceph-%s" % (type_), "-f", "--cluster", cluster_name, "-i", id_] if type_ in config.get("cpu_profile", []): profile_path = "/var/log/ceph/profiling-logger/%s.prof" % (role) run_cmd.extend(["env", "CPUPROFILE=%s" % profile_path]) if config.get("valgrind") is not None: valgrind_args = None if type_ in config["valgrind"]: valgrind_args = config["valgrind"][type_] if role in config["valgrind"]: valgrind_args = config["valgrind"][role] run_cmd = teuthology.get_valgrind_args(testdir, role, run_cmd, valgrind_args) run_cmd.extend(run_cmd_tail) ctx.daemons.add_daemon( remote, type_, id_, cluster=cluster_name, args=run_cmd, logger=log.getChild(role), stdin=run.PIPE, wait=False, ) try: yield finally: teuthology.stop_daemons_of_type(ctx, type_, cluster_name)
def run_daemon(ctx, config, type_): """ Run daemons for a role type. Handle the startup and termination of a a daemon. On startup -- set coverages, cpu_profile, valgrind values for all remotes, and a max_mds value for one mds. On cleanup -- Stop all existing daemons of this type. :param ctx: Context :param config: Configuration :paran type_: Role type """ log.info('Starting %s daemons...' % type_) testdir = teuthology.get_testdir(ctx) daemons = ctx.cluster.only(teuthology.is_type(type_)) # check whether any daemons if this type are configured if daemons is None: return coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) daemon_signal = 'kill' if config.get('coverage') or config.get('valgrind') is not None: daemon_signal = 'term' for remote, roles_for_host in daemons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, type_): name = '%s.%s' % (type_, id_) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'daemon-helper', daemon_signal, ] run_cmd_tail = [ 'ceph-%s' % (type_), '-f', '-i', id_] if type_ in config.get('cpu_profile', []): profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (type_, id_) run_cmd.extend([ 'env', 'CPUPROFILE=%s' % profile_path ]) if config.get('valgrind') is not None: valgrind_args = None if type_ in config['valgrind']: valgrind_args = config['valgrind'][type_] if name in config['valgrind']: valgrind_args = config['valgrind'][name] run_cmd = teuthology.get_valgrind_args(testdir, name, run_cmd, valgrind_args) run_cmd.extend(run_cmd_tail) ctx.daemons.add_daemon(remote, type_, id_, args=run_cmd, logger=log.getChild(name), stdin=run.PIPE, wait=False, ) try: yield finally: teuthology.stop_daemons_of_type(ctx, type_)
def start_rgw(ctx, config, clients): """ Start rgw on remote sites. """ log.info('Starting rgw...') testdir = teuthology.get_testdir(ctx) for client in clients: (remote, ) = ctx.cluster.only(client).remotes.iterkeys() cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] log.info("Using %s as radosgw frontend", ctx.rgw.frontend) host, port = ctx.rgw.role_endpoints[client] rgw_cmd.extend([ '--rgw-frontends', '{frontend} port={port}'.format(frontend=ctx.rgw.frontend, port=port), '-n', client_with_id, '--cluster', cluster_name, '-k', '/etc/ceph/{client_with_cluster}.keyring'.format( client_with_cluster=client_with_cluster), '--log-file', '/var/log/ceph/rgw.{client_with_cluster}.log'.format( client_with_cluster=client_with_cluster), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format( tdir=testdir, client_with_cluster=client_with_cluster), '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format( tdir=testdir, client_with_cluster=client_with_cluster), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = teuthology.get_valgrind_args( testdir, client_with_cluster, cmd_prefix, client_config.get('valgrind')) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client_with_id, cluster=cluster_name, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) # XXX: add_daemon() doesn't let us wait until radosgw finishes startup for client in config.keys(): host, port = ctx.rgw.role_endpoints[client] endpoint = 'http://{host}:{port}/'.format(host=host, port=port) log.info( 'Polling {client} until it starts accepting connections on {endpoint}' .format(client=client, endpoint=endpoint)) wait_for_radosgw(endpoint) try: yield finally: for client in config.iterkeys(): cluster_name, daemon_type, client_id = teuthology.split_role( client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop() ctx.cluster.only(client).run(args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format( tdir=testdir, client=client_with_cluster), ], )
def run_daemon(ctx, config, type_): log.info('Starting %s daemons...' % type_) daemons = ctx.cluster.only(teuthology.is_type(type_)) coverage_dir = '/tmp/cephtest/archive/coverage' daemon_signal = 'kill' if config.get('coverage') or config.get('valgrind') is not None: daemon_signal = 'term' num_active = 0 for remote, roles_for_host in daemons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, type_): name = '%s.%s' % (type_, id_) if not id_.endswith('-s'): num_active += 1 run_cmd = [ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/daemon-helper', daemon_signal, ] run_cmd_tail = [ '/tmp/cephtest/binary/usr/local/bin/ceph-%s' % type_, '-f', '-i', id_, '-c', '/tmp/cephtest/ceph.conf'] if config.get('valgrind') is not None: valgrind_args = None if type_ in config['valgrind']: valgrind_args = config['valgrind'][type_] if name in config['valgrind']: valgrind_args = config['valgrind'][name] run_cmd.extend(teuthology.get_valgrind_args(name, valgrind_args)) if type_ in config.get('cpu_profile', []): profile_path = '/tmp/cephtest/archive/log/%s.%s.prof' % (type_, id_) run_cmd.extend([ 'env', 'CPUPROFILE=%s' % profile_path ]) run_cmd.extend(run_cmd_tail) ctx.daemons.add_daemon(remote, type_, id_, args=run_cmd, logger=log.getChild(name), stdin=run.PIPE, wait=False, ) if type_ == 'mds': firstmon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() mon0_remote.run(args=[ '/tmp/cephtest/enable-coredump', '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', coverage_dir, '/tmp/cephtest/binary/usr/local/bin/ceph', '-c', '/tmp/cephtest/ceph.conf', 'mds', 'set_max_mds', str(num_active)]) try: yield finally: log.info('Shutting down %s daemons...' % type_) exc_info = (None, None, None) for daemon in ctx.daemons.iter_daemons_of_role(type_): try: daemon.stop() except (run.CommandFailedError, run.CommandCrashedError, run.ConnectionLostError): exc_info = sys.exc_info() log.exception('Saw exception from %s.%s', daemon.role, daemon.id_) if exc_info != (None, None, None): raise exc_info[0], exc_info[1], exc_info[2]
def start_rgw(ctx, config, clients): """ Start rgw on remote sites. """ log.info('Starting rgw...') testdir = teuthology.get_testdir(ctx) for client in clients: (remote, ) = ctx.cluster.only(client).remotes.keys() cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] log.info("Using %s as radosgw frontend", ctx.rgw.frontend) endpoint = ctx.rgw.role_endpoints[client] frontends = ctx.rgw.frontend frontend_prefix = client_config.get('frontend_prefix', None) if frontend_prefix: frontends += ' prefix={pfx}'.format(pfx=frontend_prefix) if endpoint.cert: # add the ssl certificate path frontends += ' ssl_certificate={}'.format( endpoint.cert.certificate) if ctx.rgw.frontend == 'civetweb': frontends += ' port={}s'.format(endpoint.port) else: frontends += ' ssl_port={}'.format(endpoint.port) else: frontends += ' port={}'.format(endpoint.port) rgw_cmd.extend([ '--rgw-frontends', frontends, '-n', client_with_id, '--cluster', cluster_name, '-k', '/etc/ceph/{client_with_cluster}.keyring'.format( client_with_cluster=client_with_cluster), '--log-file', '/var/log/ceph/rgw.{client_with_cluster}.log'.format( client_with_cluster=client_with_cluster), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format( tdir=testdir, client_with_cluster=client_with_cluster), ]) keystone_role = client_config.get('use-keystone-role', None) if keystone_role is not None: if not ctx.keystone: raise ConfigError('rgw must run after the keystone task') url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format( host=endpoint.hostname, port=endpoint.port) ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url) keystone_host, keystone_port = \ ctx.keystone.public_endpoints[keystone_role] rgw_cmd.extend([ '--rgw_keystone_url', 'http://{khost}:{kport}'.format(khost=keystone_host, kport=keystone_port), ]) if client_config.get('dns-name') is not None: rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name]) if client_config.get('dns-s3website-name') is not None: rgw_cmd.extend( ['--rgw-dns-s3website-name', endpoint.website_dns_name]) vault_role = client_config.get('use-vault-role', None) barbican_role = client_config.get('use-barbican-role', None) token_path = teuthology.get_testdir(ctx) + '/vault-token' if barbican_role is not None: if not hasattr(ctx, 'barbican'): raise ConfigError('rgw must run after the barbican task') barbican_host, barbican_port = \ ctx.barbican.endpoints[barbican_role] log.info("Use barbican url=%s:%s", barbican_host, barbican_port) rgw_cmd.extend([ '--rgw_barbican_url', 'http://{bhost}:{bport}'.format(bhost=barbican_host, bport=barbican_port), ]) elif vault_role is not None: if not ctx.vault.root_token: raise ConfigError('vault: no "root_token" specified') # create token on file ctx.cluster.only(client).run(args=[ 'echo', '-n', ctx.vault.root_token, run.Raw('>'), token_path ]) log.info("Token file content") ctx.cluster.only(client).run(args=['cat', token_path]) log.info("Restrict access to token file") ctx.cluster.only(client).run(args=['chmod', '600', token_path]) ctx.cluster.only(client).run( args=['sudo', 'chown', 'ceph', token_path]) rgw_cmd.extend([ '--rgw_crypt_vault_addr', "{}:{}".format(*ctx.vault.endpoints[vault_role]), '--rgw_crypt_vault_token_file', token_path ]) rgw_cmd.extend([ '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format( client_with_cluster=client_with_cluster), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = teuthology.get_valgrind_args( testdir, client_with_cluster, cmd_prefix, client_config.get('valgrind'), # see https://github.com/ceph/teuthology/pull/1600 exit_on_first_error=False) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client_with_id, cluster=cluster_name, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) # XXX: add_daemon() doesn't let us wait until radosgw finishes startup for client in clients: endpoint = ctx.rgw.role_endpoints[client] url = endpoint.url() log.info( 'Polling {client} until it starts accepting connections on {url}'. format(client=client, url=url)) (remote, ) = ctx.cluster.only(client).remotes.keys() wait_for_radosgw(url, remote) try: yield finally: for client in clients: cluster_name, daemon_type, client_id = teuthology.split_role( client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop() ctx.cluster.only(client).run(args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format( tdir=testdir, client=client_with_cluster), ], ) ctx.cluster.only(client).run(args=['rm', '-f', token_path])
def task(ctx, config): """ Mount/unmount a ``ceph-fuse`` client. The config is optional and defaults to mounting on all clients. If a config is given, it is expected to be a list of clients to do this operation on. This lets you e.g. set up one client with ``ceph-fuse`` and another with ``kclient``. Example that mounts all clients:: tasks: - ceph: - ceph-fuse: - interactive: Example that uses both ``kclient` and ``ceph-fuse``:: tasks: - ceph: - ceph-fuse: [client.0] - kclient: [client.1] - interactive: Example that enables valgrind: tasks: - ceph: - ceph-fuse: client.0: valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] - interactive: :param ctx: Context :param config: Configuration """ log.info('Mounting ceph-fuse clients...') fuse_daemons = {} testdir = teuthology.get_testdir(ctx) if config is None: config = dict( ('client.{id}'.format(id=id_), None) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) elif isinstance(config, list): config = dict((name, None) for name in config) overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('ceph-fuse', {})) clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) for id_, remote in clients: client_config = config.get("client.%s" % id_) if client_config is None: client_config = {} log.info("Client client.%s config is %s" % (id_, client_config)) daemon_signal = 'kill' if client_config.get('coverage') or client_config.get( 'valgrind') is not None: daemon_signal = 'term' mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=id_, remote=remote, mnt=mnt)) remote.run(args=[ 'mkdir', '--', mnt, ], ) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', daemon_signal, ] run_cmd_tail = [ 'ceph-fuse', '-f', '--name', 'client.{id}'.format(id=id_), # TODO ceph-fuse doesn't understand dash dash '--', mnt, ] if client_config.get('valgrind') is not None: run_cmd = teuthology.get_valgrind_args( testdir, 'client.{id}'.format(id=id_), run_cmd, client_config.get('valgrind'), ) run_cmd.extend(run_cmd_tail) proc = remote.run( args=run_cmd, logger=log.getChild('ceph-fuse.{id}'.format(id=id_)), stdin=run.PIPE, wait=False, ) fuse_daemons[id_] = proc for id_, remote in clients: mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) teuthology.wait_until_fuse_mounted( remote=remote, fuse=fuse_daemons[id_], mountpoint=mnt, ) remote.run(args=[ 'sudo', 'chmod', '1777', '{tdir}/mnt.{id}'.format(tdir=testdir, id=id_) ], ) try: yield finally: log.info('Unmounting ceph-fuse clients...') for id_, remote in clients: mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) try: remote.run(args=[ 'sudo', 'fusermount', '-u', mnt, ], ) except run.CommandFailedError: log.info('Failed to unmount ceph-fuse on {name}, aborting...'. format(name=remote.name)) # abort the fuse mount, killing all hung processes remote.run(args=[ 'if', 'test', '-e', '/sys/fs/fuse/connections/*/abort', run.Raw(';'), 'then', 'echo', '1', run.Raw('>'), run.Raw('/sys/fs/fuse/connections/*/abort'), run.Raw(';'), 'fi', ], ) # make sure its unmounted remote.run(args=[ 'sudo', 'umount', '-l', '-f', mnt, ], ) run.wait(fuse_daemons.itervalues()) for id_, remote in clients: mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) remote.run(args=[ 'rmdir', '--', mnt, ], )
def task(ctx, config): """ Mount/unmount a ``ceph-fuse`` client. The config is optional and defaults to mounting on all clients. If a config is given, it is expected to be a list of clients to do this operation on. This lets you e.g. set up one client with ``ceph-fuse`` and another with ``kclient``. Example that mounts all clients:: tasks: - ceph: - ceph-fuse: - interactive: Example that uses both ``kclient` and ``ceph-fuse``:: tasks: - ceph: - ceph-fuse: [client.0] - kclient: [client.1] - interactive: Example that enables valgrind: tasks: - ceph: - ceph-fuse: client.0: valgrind: --tool=memcheck - interactive: """ log.info("Mounting ceph-fuse clients...") fuse_daemons = {} if config is None: config = dict( ("client.{id}".format(id=id_), None) for id_ in teuthology.all_roles_of_type(ctx.cluster, "client") ) elif isinstance(config, list): config = dict((name, None) for name in config) clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) for id_, remote in clients: mnt = os.path.join("/tmp/cephtest", "mnt.{id}".format(id=id_)) log.info("Mounting ceph-fuse client.{id} at {remote} {mnt}...".format(id=id_, remote=remote, mnt=mnt)) client_config = config.get("client.%s" % id_) if client_config is None: client_config = {} log.info("Client client.%s config is %s" % (id_, client_config)) daemon_signal = "kill" if client_config.get("coverage") or client_config.get("valgrind") is not None: daemon_signal = "term" remote.run(args=["mkdir", "--", mnt]) run_cmd = [ "/tmp/cephtest/enable-coredump", "/tmp/cephtest/binary/usr/local/bin/ceph-coverage", "/tmp/cephtest/archive/coverage", "/tmp/cephtest/daemon-helper", daemon_signal, ] run_cmd_tail = [ "/tmp/cephtest/binary/usr/local/bin/ceph-fuse", "-f", "--name", "client.{id}".format(id=id_), "-c", "/tmp/cephtest/ceph.conf", # TODO ceph-fuse doesn't understand dash dash '--', mnt, ] if client_config.get("valgrind") is not None: run_cmd.extend(teuthology.get_valgrind_args("client.{id}".format(id=id_), client_config.get("valgrind"))) run_cmd.extend(run_cmd_tail) proc = remote.run( args=run_cmd, logger=log.getChild("ceph-fuse.{id}".format(id=id_)), stdin=run.PIPE, wait=False ) fuse_daemons[id_] = proc for id_, remote in clients: mnt = os.path.join("/tmp/cephtest", "mnt.{id}".format(id=id_)) teuthology.wait_until_fuse_mounted(remote=remote, fuse=fuse_daemons[id_], mountpoint=mnt) try: yield finally: log.info("Unmounting ceph-fuse clients...") for id_, remote in clients: mnt = os.path.join("/tmp/cephtest", "mnt.{id}".format(id=id_)) remote.run(args=["fusermount", "-u", mnt]) run.wait(fuse_daemons.itervalues()) for id_, remote in clients: mnt = os.path.join("/tmp/cephtest", "mnt.{id}".format(id=id_)) remote.run(args=["rmdir", "--", mnt])
def _mount(self, mount_path, mount_fs_name): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' if self.client_config.get('coverage') or self.client_config.get('valgrind') is not None: daemon_signal = 'term' log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format( id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) self.client_remote.run( args=[ 'mkdir', '--', self.mountpoint, ], ) run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), 'daemon-helper', daemon_signal, ] fuse_cmd = ['ceph-fuse', "-f"] if mount_path is not None: fuse_cmd += ["--client_mountpoint={0}".format(mount_path)] if mount_fs_name is not None: fuse_cmd += ["--client_mds_namespace={0}".format(mount_fs_name)] fuse_cmd += [ '--name', 'client.{id}'.format(id=self.client_id), # TODO ceph-fuse doesn't understand dash dash '--', self.mountpoint, ] if self.client_config.get('valgrind') is not None: run_cmd = misc.get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), run_cmd, self.client_config.get('valgrind'), ) run_cmd.extend(fuse_cmd) def list_connections(): self.client_remote.run( args=["sudo", "mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"], check_status=False ) p = self.client_remote.run( args=["ls", "/sys/fs/fuse/connections"], stdout=StringIO(), check_status=False ) if p.exitstatus != 0: return [] ls_str = p.stdout.getvalue().strip() if ls_str: return [int(n) for n in ls_str.split("\n")] else: return [] # Before starting ceph-fuse process, note the contents of # /sys/fs/fuse/connections pre_mount_conns = list_connections() log.info("Pre-mount connections: {0}".format(pre_mount_conns)) proc = self.client_remote.run( args=run_cmd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, wait=False, ) self.fuse_daemon = proc # Wait for the connection reference to appear in /sys mount_wait = self.client_config.get('mount_wait', 0) if mount_wait > 0: log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait)) time.sleep(mount_wait) timeout = int(self.client_config.get('mount_timeout', 30)) waited = 0 post_mount_conns = list_connections() while len(post_mount_conns) <= len(pre_mount_conns): if self.fuse_daemon.finished: # Did mount fail? Raise the CommandFailedError instead of # hitting the "failed to populate /sys/" timeout self.fuse_daemon.wait() time.sleep(1) waited += 1 if waited > timeout: raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format( waited )) else: post_mount_conns = list_connections() log.info("Post-mount connections: {0}".format(post_mount_conns)) # Record our fuse connection number so that we can use it when # forcing an unmount new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) if len(new_conns) == 0: raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns)) elif len(new_conns) > 1: raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns)) else: self._fuse_conn = new_conns[0] status = self.admin_socket(['status']) self.id = status['id'] try: self.inst = status['inst_str'] self.addr = status['addr_str'] except KeyError as e: sessions = self.fs.rank_asok(['session', 'ls']) for s in sessions: if s['id'] == self.id: self.inst = s['inst'] self.addr = self.inst.split()[1] if self.inst is None: raise RuntimeError("cannot find client session")
def run_daemon(ctx, config, type_): log.info('Starting %s daemons...' % type_) testdir = teuthology.get_testdir(ctx) daemons = ctx.cluster.only(teuthology.is_type(type_)) coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) daemon_signal = 'kill' if config.get('coverage') or config.get('valgrind') is not None: daemon_signal = 'term' num_active = 0 for remote, roles_for_host in daemons.remotes.iteritems(): for id_ in teuthology.roles_of_type(roles_for_host, type_): name = '%s.%s' % (type_, id_) if not (id_.endswith('-s')) and (id_.find('-s-') == -1): num_active += 1 run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'daemon-helper', daemon_signal, ] run_cmd_tail = [ 'ceph-%s' % (type_), '-f', '-i', id_] if type_ in config.get('cpu_profile', []): profile_path = '/var/log/ceph/profiling-logger/%s.%s.prof' % (type_, id_) run_cmd.extend([ 'env', 'CPUPROFILE=%s' % profile_path ]) if config.get('valgrind') is not None: valgrind_args = None if type_ in config['valgrind']: valgrind_args = config['valgrind'][type_] if name in config['valgrind']: valgrind_args = config['valgrind'][name] run_cmd = teuthology.get_valgrind_args(testdir, name, run_cmd, valgrind_args) run_cmd.extend(run_cmd_tail) ctx.daemons.add_daemon(remote, type_, id_, args=run_cmd, logger=log.getChild(name), stdin=run.PIPE, wait=False, ) if type_ == 'mds': firstmon = teuthology.get_first_mon(ctx, config) (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() mon0_remote.run(args=[ 'adjust-ulimits', 'ceph-coverage', coverage_dir, 'ceph', 'mds', 'set_max_mds', str(num_active)]) try: yield finally: teuthology.stop_daemons_of_type(ctx, type_)