def kill_run(run_name, archive_base=None, owner=None, machine_type=None, preserve_queue=False): run_info = {} serializer = report.ResultsSerializer(archive_base) if archive_base: run_archive_dir = os.path.join(archive_base, run_name) if os.path.isdir(run_archive_dir): run_info = find_run_info(serializer, run_name) if 'machine_type' in run_info: machine_type = run_info['machine_type'] owner = run_info['owner'] else: log.warn("The run info does not have machine type: %s" % run_info) log.warn("Run archive used: %s" % run_archive_dir) log.info("Using machine type '%s' and owner '%s'" % (machine_type, owner)) elif machine_type is None: raise RuntimeError("The run is still entirely enqueued; " + "you must also pass --machine-type") if not preserve_queue: remove_beanstalk_jobs(run_name, machine_type) remove_paddles_jobs(run_name) kill_processes(run_name, run_info.get('pids')) if owner is not None: targets = find_targets(run_name, owner) nuke_targets(targets, owner)
def unlock_targets(job_config): serializer = report.ResultsSerializer(teuth_config.archive_base) job_info = serializer.job_info(job_config['name'], job_config['job_id']) machine_statuses = query.get_statuses(job_info['targets'].keys()) # only unlock/nuke targets if locked and description matches locked = [] for status in machine_statuses: name = shortname(status['name']) description = status['description'] if not status['locked']: continue if description != job_info['archive_path']: log.warning( "Was going to unlock %s but it was locked by another job: %s", name, description) continue locked.append(name) if not locked: return job_status = get_status(job_info) if job_status == 'pass' or \ (job_config.get('unlock_on_failure', False) and not job_config.get('nuke-on-error', False)): log.info('Unlocking machines...') fake_ctx = create_fake_context(job_config) for machine in locked: teuthology.lock.ops.unlock_one(fake_ctx, machine, job_info['owner'], job_info['archive_path']) if job_status != 'pass' and job_config.get('nuke-on-error', False): log.info('Nuking machines...') fake_ctx = create_fake_context(job_config) nuke(fake_ctx, True)
def kill_job(run_name, job_id, archive_base=None, owner=None): serializer = report.ResultsSerializer(archive_base) job_info = serializer.job_info(run_name, job_id) if not owner: if 'owner' not in job_info: raise RuntimeError( "I could not figure out the owner of the requested job. " "Please pass --owner <owner>.") owner = job_info['owner'] kill_processes(run_name, [job_info.get('pid')]) targets = dict(targets=job_info.get('targets', {})) nuke_targets(targets, owner)
def transfer_archives(run_name, job_id, archive_base, job_config): serializer = report.ResultsSerializer(archive_base) job_info = serializer.job_info(run_name, job_id, simple=True) if 'archive' in job_info: ctx = create_fake_context(job_config) add_remotes(ctx, job_config) for log_type, log_path in job_info['archive'].items(): if log_type == 'init': log_type = '' compress_logs(ctx, log_path) archive_logs(ctx, log_path, log_type) else: log.info('No archives to transfer.')
def kill_job(run_name, job_id, archive_base=None, owner=None, save_logs=False): serializer = report.ResultsSerializer(archive_base) job_info = serializer.job_info(run_name, job_id) if not owner: if 'owner' not in job_info: raise RuntimeError( "I could not figure out the owner of the requested job. " "Please pass --owner <owner>.") owner = job_info['owner'] kill_processes(run_name, [job_info.get('pid')]) # Because targets can be missing for some cases, for example, when all # the necessary nodes ain't locked yet, we do not use job_info to get them, # but use find_targets(): targets = find_targets(run_name, owner, job_id) nuke_targets(targets, owner, save_logs)
def kill_run(run_name, archive_base=None, owner=None, machine_type=None, preserve_queue=False): run_info = {} serializer = report.ResultsSerializer(archive_base) if archive_base: run_archive_dir = os.path.join(archive_base, run_name) if os.path.isdir(run_archive_dir): run_info = find_run_info(serializer, run_name) if 'machine_type' in run_info: machine_type = run_info['machine_type'] owner = run_info['owner'] else: log.warning("The run info does not have machine type: %s" % run_info) log.warning("Run archive used: %s" % run_archive_dir) log.info("Using machine type '%s' and owner '%s'" % (machine_type, owner)) elif machine_type is None: # no jobs found in archive and no machine type specified, # so we try paddles to see if there is anything scheduled run_info = report.ResultsReporter().get_run(run_name) machine_type = run_info.get('machine_type', None) if machine_type: log.info( f"Using machine type '{machine_type}' received from paddles." ) else: raise RuntimeError( f"Cannot find machine type for the run {run_name}; " + "you must also pass --machine-type") if not preserve_queue: remove_beanstalk_jobs(run_name, machine_type) remove_paddles_jobs(run_name) kill_processes(run_name, run_info.get('pids')) if owner is not None: targets = find_targets(run_name, owner) nuke_targets(targets, owner)
def unlock_targets(job_config): serializer = report.ResultsSerializer(teuth_config.archive_base) job_info = serializer.job_info(job_config['name'], job_config['job_id']) machine_status = query.get_statuses(job_info['targets'].keys()) # only unlock/nuke targets if locked in the first place locked = [shortname(_['name']) for _ in machine_status if _['locked']] if not locked: return job_status = get_status(job_info) if job_status == 'pass' or \ (job_config.get('unlock_on_failure', False) and not job_config.get('nuke-on-error', False)): log.info('Unlocking machines...') fake_ctx = create_fake_context(job_config) for machine in locked: teuthology.lock.ops.unlock_one(fake_ctx, machine, job_info['owner'], job_info['archive_path']) if job_status != 'pass' and job_config.get('nuke-on-error', False): log.info('Nuking machines...') fake_ctx = create_fake_context(job_config) nuke(fake_ctx, True)