def begin(self): super(CephFSMirror, self).begin() testdir = misc.get_testdir(self.ctx) args = [ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] if 'valgrind' in self.config: args = get_valgrind_args( testdir, 'cephfs-mirror-{id}'.format(id=self.client), args, self.config.get('valgrind')) args.extend([ 'cephfs-mirror', '--cluster', self.cluster_name, '--id', self.client_id, ]) self.ctx.daemons.add_daemon( self.remote, 'cephfs-mirror', self.client, args=args, logger=self.log.getChild(self.client), stdin=run.PIPE, wait=False, )
def _add_valgrind_args(self, mount_cmd): if self.client_config.get('valgrind') is not None: mount_cmd = get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), mount_cmd, self.client_config.get('valgrind'), cd=False) return mount_cmd
def begin(self): super(RBDMirror, self).begin() testdir = misc.get_testdir(self.ctx) daemon_signal = 'kill' if 'coverage' in self.config or 'valgrind' in self.config or \ self.config.get('thrash', False): daemon_signal = 'term' args = [ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', daemon_signal, ] if 'valgrind' in self.config: args = get_valgrind_args(testdir, 'rbd-mirror-{id}'.format(id=self.client), args, self.config.get('valgrind')) args.extend([ 'rbd-mirror', '--foreground', '--cluster', self.cluster_name, '--id', self.client_id, ]) self.ctx.daemons.add_daemon( self.remote, 'rbd-mirror', self.client, cluster=self.cluster_name, args=args, logger=self.log.getChild(self.client), stdin=run.PIPE, wait=False, )
def _mount(self, mntopts, check_status): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' if self.client_config.get('coverage') or \ self.client_config.get('valgrind') is not None: daemon_signal = 'term' # Use 0000 mode to prevent undesired modifications to the mountpoint on # the local file system. script = f'mkdir -m 0000 -p -v {self.hostfs_mntpt}'.split() stderr = StringIO() try: self.client_remote.run(args=script, timeout=(15 * 60), stderr=StringIO()) except CommandFailedError: if 'file exists' not in stderr.getvalue().lower(): raise run_cmd = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=self.test_dir), 'daemon-helper', daemon_signal, ] fuse_cmd = [ 'ceph-fuse', "-f", "--admin-socket", "/var/run/ceph/$cluster-$name.$pid.asok", ] if self.client_id is not None: fuse_cmd += ['--id', self.client_id] if self.client_keyring_path and self.client_id is not None: fuse_cmd += ['-k', self.client_keyring_path] if self.cephfs_mntpt is not None: fuse_cmd += ["--client_mountpoint=" + self.cephfs_mntpt] if self.cephfs_name is not None: fuse_cmd += ["--client_fs=" + self.cephfs_name] if mntopts: fuse_cmd += mntopts fuse_cmd.append(self.hostfs_mntpt) if self.client_config.get('valgrind') is not None: run_cmd = get_valgrind_args( self.test_dir, 'client.{id}'.format(id=self.client_id), run_cmd, self.client_config.get('valgrind'), cd=False) netns_prefix = [ 'sudo', 'nsenter', '--net=/var/run/netns/{0}'.format(self.netns_name) ] run_cmd = netns_prefix + run_cmd run_cmd.extend(fuse_cmd) def list_connections(): conn_dir = "/sys/fs/fuse/connections" self.client_remote.run(args=['sudo', 'modprobe', 'fuse'], check_status=False) self.client_remote.run( args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir], check_status=False, timeout=(30)) try: ls_str = self.client_remote.sh("ls " + conn_dir, stdout=StringIO(), timeout=(15 * 60)).strip() except CommandFailedError: return [] if ls_str: return [int(n) for n in ls_str.split("\n")] else: return [] # Before starting ceph-fuse process, note the contents of # /sys/fs/fuse/connections pre_mount_conns = list_connections() log.info("Pre-mount connections: {0}".format(pre_mount_conns)) mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() self.fuse_daemon = self.client_remote.run( args=run_cmd, logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), stdin=run.PIPE, stdout=mountcmd_stdout, stderr=mountcmd_stderr, wait=False) # Wait for the connection reference to appear in /sys mount_wait = self.client_config.get('mount_wait', 0) if mount_wait > 0: log.info( "Fuse mount waits {0} seconds before checking /sys/".format( mount_wait)) time.sleep(mount_wait) timeout = int(self.client_config.get('mount_timeout', 30)) waited = 0 post_mount_conns = list_connections() while len(post_mount_conns) <= len(pre_mount_conns): if self.fuse_daemon.finished: # Did mount fail? Raise the CommandFailedError instead of # hitting the "failed to populate /sys/" timeout try: self.fuse_daemon.wait() except CommandFailedError as e: log.info('mount command failed.') if check_status: raise else: return (e, mountcmd_stdout.getvalue(), mountcmd_stderr.getvalue()) time.sleep(1) waited += 1 if waited > timeout: raise RuntimeError( "Fuse mount failed to populate/sys/ after {} " "seconds".format(waited)) else: post_mount_conns = list_connections() log.info("Post-mount connections: {0}".format(post_mount_conns)) # Record our fuse connection number so that we can use it when # forcing an unmount new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) if len(new_conns) == 0: raise RuntimeError( "New fuse connection directory not found ({0})".format( new_conns)) elif len(new_conns) > 1: raise RuntimeError( "Unexpectedly numerous fuse connections {0}".format(new_conns)) else: self._fuse_conn = new_conns[0] self.gather_mount_info() self.mounted = True
def _run_one_client(ctx, config, role): """Spawned task that runs the client""" krbd = config.get('krbd', False) nbd = config.get('nbd', False) testdir = teuthology.get_testdir(ctx) (remote, ) = ctx.cluster.only(role).remotes.keys() args = [] if krbd or nbd: args.append('sudo') # rbd(-nbd) map/unmap need privileges args.extend([ 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir) ]) overrides = ctx.config.get('overrides', {}) teuthology.deep_merge(config, overrides.get('rbd_fsx', {})) if config.get('valgrind'): args = get_valgrind_args(testdir, 'fsx_{id}'.format(id=role), args, config.get('valgrind')) cluster_name, type_, client_id = teuthology.split_role(role) if type_ != 'client': msg = 'client role ({0}) must be a client'.format(role) raise ConfigError(msg) args.extend([ 'ceph_test_librbd_fsx', '--cluster', cluster_name, '--id', client_id, '-d', # debug output for all operations '-W', '-R', # mmap doesn't work with rbd '-p', str(config.get('progress_interval', 100)), # show progress '-P', '{tdir}/archive'.format(tdir=testdir), '-r', str(config.get('readbdy', 1)), '-w', str(config.get('writebdy', 1)), '-t', str(config.get('truncbdy', 1)), '-h', str(config.get('holebdy', 1)), '-l', str(config.get('size', 250000000)), '-S', str(config.get('seed', 0)), '-N', str(config.get('ops', 1000)), ]) if krbd: args.append('-K') # -K enables krbd mode if nbd: args.append('-M') # -M enables nbd mode if config.get('direct_io', False): args.append('-Z') # -Z use direct IO if not config.get('randomized_striping', True): args.append('-U') # -U disables randomized striping if not config.get('punch_holes', True): args.append('-H') # -H disables discard ops if config.get('deep_copy', False): args.append('-g') # -g deep copy instead of clone if config.get('journal_replay', False): args.append('-j') # -j replay all IO events from journal if config.get('keep_images', False): args.append('-k') # -k keep images on success args.extend([ config.get('pool_name', 'pool_{pool}'.format(pool=role)), 'image_{image}'.format(image=role), ]) remote.run(args=args)
def start_rgw(ctx, config, clients): """ Start rgw on remote sites. """ log.info('Starting rgw...') testdir = teuthology.get_testdir(ctx) for client in clients: (remote, ) = ctx.cluster.only(client).remotes.keys() cluster_name, daemon_type, client_id = teuthology.split_role(client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id client_config = config.get(client) if client_config is None: client_config = {} log.info("rgw %s config is %s", client, client_config) cmd_prefix = [ 'sudo', 'adjust-ulimits', 'ceph-coverage', '{tdir}/archive/coverage'.format(tdir=testdir), 'daemon-helper', 'term', ] rgw_cmd = ['radosgw'] log.info("Using %s as radosgw frontend", ctx.rgw.frontend) endpoint = ctx.rgw.role_endpoints[client] frontends = ctx.rgw.frontend frontend_prefix = client_config.get('frontend_prefix', None) if frontend_prefix: frontends += ' prefix={pfx}'.format(pfx=frontend_prefix) if endpoint.cert: # add the ssl certificate path frontends += ' ssl_certificate={}'.format( endpoint.cert.certificate) if ctx.rgw.frontend == 'civetweb': frontends += ' port={}s'.format(endpoint.port) else: frontends += ' ssl_port={}'.format(endpoint.port) else: frontends += ' port={}'.format(endpoint.port) rgw_cmd.extend([ '--rgw-frontends', frontends, '-n', client_with_id, '--cluster', cluster_name, '-k', '/etc/ceph/{client_with_cluster}.keyring'.format( client_with_cluster=client_with_cluster), '--log-file', '/var/log/ceph/rgw.{client_with_cluster}.log'.format( client_with_cluster=client_with_cluster), '--rgw_ops_log_socket_path', '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format( tdir=testdir, client_with_cluster=client_with_cluster), ]) keystone_role = client_config.get('use-keystone-role', None) if keystone_role is not None: if not ctx.keystone: raise ConfigError('rgw must run after the keystone task') url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format( host=endpoint.hostname, port=endpoint.port) ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url) keystone_host, keystone_port = \ ctx.keystone.public_endpoints[keystone_role] rgw_cmd.extend([ '--rgw_keystone_url', 'http://{khost}:{kport}'.format(khost=keystone_host, kport=keystone_port), ]) if client_config.get('dns-name') is not None: rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name]) if client_config.get('dns-s3website-name') is not None: rgw_cmd.extend( ['--rgw-dns-s3website-name', endpoint.website_dns_name]) vault_role = client_config.get('use-vault-role', None) barbican_role = client_config.get('use-barbican-role', None) pykmip_role = client_config.get('use-pykmip-role', None) token_path = teuthology.get_testdir(ctx) + '/vault-token' if barbican_role is not None: if not hasattr(ctx, 'barbican'): raise ConfigError('rgw must run after the barbican task') barbican_host, barbican_port = \ ctx.barbican.endpoints[barbican_role] log.info("Use barbican url=%s:%s", barbican_host, barbican_port) rgw_cmd.extend([ '--rgw_barbican_url', 'http://{bhost}:{bport}'.format(bhost=barbican_host, bport=barbican_port), ]) elif vault_role is not None: if not ctx.vault.root_token: raise ConfigError('vault: no "root_token" specified') # create token on file ctx.rgw.vault_role = vault_role ctx.cluster.only(client).run(args=[ 'echo', '-n', ctx.vault.root_token, run.Raw('>'), token_path ]) log.info("Token file content") ctx.cluster.only(client).run(args=['cat', token_path]) log.info("Restrict access to token file") ctx.cluster.only(client).run(args=['chmod', '600', token_path]) ctx.cluster.only(client).run( args=['sudo', 'chown', 'ceph', token_path]) rgw_cmd.extend([ '--rgw_crypt_vault_addr', "{}:{}".format(*ctx.vault.endpoints[vault_role]), '--rgw_crypt_vault_token_file', token_path ]) elif pykmip_role is not None: if not hasattr(ctx, 'pykmip'): raise ConfigError('rgw must run after the pykmip task') ctx.rgw.pykmip_role = pykmip_role rgw_cmd.extend([ '--rgw_crypt_kmip_addr', "{}:{}".format(*ctx.pykmip.endpoints[pykmip_role]), ]) rgw_cmd.extend([ '--foreground', run.Raw('|'), 'sudo', 'tee', '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format( client_with_cluster=client_with_cluster), run.Raw('2>&1'), ]) if client_config.get('valgrind'): cmd_prefix = get_valgrind_args( testdir, client_with_cluster, cmd_prefix, client_config.get('valgrind'), # see https://github.com/ceph/teuthology/pull/1600 exit_on_first_error=False) run_cmd = list(cmd_prefix) run_cmd.extend(rgw_cmd) ctx.daemons.add_daemon( remote, 'rgw', client_with_id, cluster=cluster_name, fsid=ctx.ceph[cluster_name].fsid, args=run_cmd, logger=log.getChild(client), stdin=run.PIPE, wait=False, ) # XXX: add_daemon() doesn't let us wait until radosgw finishes startup for client in clients: endpoint = ctx.rgw.role_endpoints[client] url = endpoint.url() log.info( 'Polling {client} until it starts accepting connections on {url}'. format(client=client, url=url)) (remote, ) = ctx.cluster.only(client).remotes.keys() wait_for_radosgw(url, remote) try: yield finally: for client in clients: cluster_name, daemon_type, client_id = teuthology.split_role( client) client_with_id = daemon_type + '.' + client_id client_with_cluster = cluster_name + '.' + client_with_id ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop() ctx.cluster.only(client).run(args=[ 'rm', '-f', '{tdir}/rgw.opslog.{client}.sock'.format( tdir=testdir, client=client_with_cluster), ], ) ctx.cluster.only(client).run(args=['rm', '-f', token_path])