def record_state_duration( job_or_task_id, hostname, status, duration_secs, type_str=DEFAULT_KEY, is_special_task=False): """Record state duration for a job or a task. @param job_or_task_id: Integer, representing a job id or a special task id. @param hostname: String, representing a hostname. @param status: One of the enum values of job_overhead.STATUS. @param duration_secs: Duration of the job/task in secs. @param is_special_task: True/Fals, whether this is a special task. @param type_str: The elastic search type string to be used when sending data to metadata db. """ if not job_or_task_id or not hostname or not status: logging.error( 'record_state_duration failed: job_or_task_id=%s, ' 'hostname=%s, status=%s', job_or_task_id, hostname, status) return id_str = 'task_id' if is_special_task else 'job_id' metadata = { id_str: int(job_or_task_id), 'hostname': hostname, 'status': status, 'duration': duration_secs} autotest_es.post(type_str=type_str, metadata=metadata)
def correct_results_folder_permission(results): """Make sure the results folder has the right permission settings. For tests running with server-side packaging, the results folder has the owner of root. This must be changed to the user running the autoserv process, so parsing job can access the results folder. TODO(dshi): crbug.com/459344 Remove this function when test container can be unprivileged container. @param results: Path to the results folder. """ if not results: return try: utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results)) utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results)) except error.CmdError as e: metadata = { 'error': str(e), 'result_folder': results, 'drone': socket.gethostname() } autotest_es.post(use_http=True, type_str='correct_results_folder_failure', metadata=metadata) raise
def create_from_base(self, name, disable_snapshot_clone=False, force_cleanup=False): """Create a container from the base container. @param name: Name of the container. @param disable_snapshot_clone: Set to True to force to clone without using snapshot clone even if the host supports that. @param force_cleanup: Force to cleanup existing container. @return: A Container object for the created container. @raise ContainerError: If the container already exist. @raise error.CmdError: If lxc-clone call failed for any reason. """ if self.exist(name) and not force_cleanup: raise error.ContainerError('Container %s already exists.' % name) # Cleanup existing container with the given name. container_folder = os.path.join(self.container_path, name) if lxc_utils.path_exists(container_folder) and force_cleanup: container = Container(self.container_path, {'name': name}) try: container.destroy() except error.CmdError as e: # The container could be created in a incompleted state. Delete # the container folder instead. logging.warn('Failed to destroy container %s, error: %s', name, e) utils.run('sudo rm -rf "%s"' % container_folder) use_snapshot = SUPPORT_SNAPSHOT_CLONE and not disable_snapshot_clone snapshot = '-s' if use_snapshot else '' # overlayfs is the default clone backend storage. However it is not # supported in Ganeti yet. Use aufs as the alternative. aufs = '-B aufs' if utils.is_vm() and use_snapshot else '' cmd = ('sudo lxc-clone -p %s -P %s %s' % (self.container_path, self.container_path, ' '.join([BASE, name, snapshot, aufs]))) try: utils.run(cmd) return self.get(name) except error.CmdError: if not use_snapshot: raise else: # Snapshot clone failed, retry clone without snapshot. The retry # won't hit the code here and cause an infinite loop as # disable_snapshot_clone is set to True. container = self.create_from_base( name, disable_snapshot_clone=True, force_cleanup=True) # Report metadata about retry success. autotest_es.post(use_http=True, type_str=CONTAINER_CREATE_RETRY_METADB_TYPE, metadata={'drone': socket.gethostname(), 'name': name, 'success': True}) return container
def delete(board): """Delete stable version record for the given board. @param board: Name of the board. """ stable_version = models.StableVersion.objects.get(board=board) stable_version.delete() autotest_es.post(type_str=_STABLE_VERSION_TYPE, metadata={ 'board': board, 'version': get() })
def handle_sigterm(signum, frame): logging.debug('Received SIGTERM') if pid_file_manager: pid_file_manager.close_file(1, signal.SIGTERM) logging.debug('Finished writing to pid_file. Killing process.') # Update results folder's file permission. This needs to be done ASAP # before the parsing process tries to access the log. if use_ssp and results: correct_results_folder_permission(results) # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved. # This sleep allows the pending output to be logged before the kill # signal is sent. time.sleep(.1) if use_ssp: logging.debug( 'Destroy container %s before aborting the autoserv ' 'process.', container_name) metadata = { 'drone': socket.gethostname(), 'job_id': job_or_task_id, 'container_name': container_name, 'action': 'abort', 'success': True } try: bucket = lxc.ContainerBucket() container = bucket.get(container_name) if container: container.destroy() else: metadata['success'] = False metadata['error'] = 'container not found' logging.debug('Container %s is not found.', container_name) except: metadata['success'] = False metadata['error'] = 'Exception: %s' % str(sys.exc_info()) # Handle any exception so the autoserv process can be aborted. logging.exception('Failed to destroy container %s.', container_name) autotest_es.post(use_http=True, type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE, metadata=metadata) # Try to correct the result file permission again after the # container is destroyed, as the container might have created some # new files in the result folder. if results: correct_results_folder_permission(results) os.killpg(os.getpgrp(), signal.SIGKILL)
def func_cleanup_if_fail(*args, **kwargs): """Decorator to do cleanup if container fails to be set up. The first argument must be a ContainerBucket object, which can be used to retrieve the container object by name. @param func: function to be called. @param args: arguments for function to be called. @param kwargs: keyword arguments for function to be called. """ bucket = args[0] name = utils.get_function_arg_value(func, 'name', args, kwargs) try: skip_cleanup = utils.get_function_arg_value( func, 'skip_cleanup', args, kwargs) except (KeyError, ValueError): skip_cleanup = False try: return func(*args, **kwargs) except: exc_info = sys.exc_info() try: container = bucket.get(name) if container and not skip_cleanup: container.destroy() except error.CmdError as e: logging.error(e) try: job_id = utils.get_function_arg_value( func, 'job_id', args, kwargs) except (KeyError, ValueError): job_id = '' metadata = { 'drone': socket.gethostname(), 'job_id': job_id, 'success': False } # Record all args if job_id is not available. if not job_id: metadata['args'] = str(args) if kwargs: metadata.update(kwargs) autotest_es.post(use_http=True, type_str=CONTAINER_CREATE_METADB_TYPE, metadata=metadata) # Raise the cached exception with original backtrace. raise exc_info[0], exc_info[1], exc_info[2]
def set(version, board=DEFAULT): """Set stable version for the given board. @param version: The new value of stable version for given board. @param board: Name of the board, default to value `DEFAULT`. """ try: stable_version = models.StableVersion.objects.get(board=board) stable_version.version = version stable_version.save() except django.core.exceptions.ObjectDoesNotExist: models.StableVersion.objects.create(board=board, version=version) autotest_es.post(type_str=_STABLE_VERSION_TYPE, metadata={ 'board': board, 'version': version })
def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False): """Schedule a job to reboot the servo host. When we schedule a job, it will create a ServoHost object which will go through this entire flow of checking if a reboot is needed and trying to schedule it. There is probably a better approach to setting up a synchronized reboot but I'm coming up short on better ideas so I apologize for this circus show. @param dut_list: List of duts that need to be locked. @param afe: Instance of afe. @param force_reboot: Boolean to indicate if a forced reboot should be scheduled or not. """ # If we've already scheduled job on a dut, we're done here. if self._sync_job_scheduled_for_duts(dut_list, afe): return # Looks like we haven't scheduled a job yet. test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot else _SERVO_HOST_FORCE_REBOOT_TEST_NAME) dut = self._choose_dut_for_synchronized_reboot(dut_list, afe) getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE]) control_file = getter.get_control_file_contents_by_name(test) control_type = control_data.CONTROL_TYPE_NAMES.SERVER try: afe.create_job(control_file=control_file, name=test, control_type=control_type, hosts=[dut]) except Exception as e: # Sometimes creating the job will raise an exception. We'll log it # but we don't want to fail because of it. logging.exception('Scheduling reboot job failed: %s', e) metadata = { 'dut': dut, 'servo_host': self.hostname, 'error': str(e), 'details': traceback.format_exc() } # We want to track how often we fail here so we can justify # investing some effort into hardening up afe.create_job(). autotest_es.post(use_http=True, type_str='servohost_Reboot_schedule_fail', metadata=metadata)
def record_state(self, type_str, state, value): """Record metadata in elasticsearch. If ES configured to use http, then we will time that http request. Otherwise, it uses UDP, so we will not need to time it. @param type_str: sets the _type field in elasticsearch db. @param state: string representing what state we are recording, e.g. 'status' @param value: value of the state, e.g. 'verifying' """ metadata = { 'time_changed': time.time(), state: value, 'job_id': self.job_id, } if self.host: metadata['hostname'] = self.host.hostname autotest_es.post(type_str=type_str, metadata=metadata)
def collect_info(): """Collect label info and report to metaDB. """ # time_index is to index all host labels collected together. It's # converted to int to make search faster. time_index = int(time.time()) hosts = models.Host.objects.filter(invalid=False) data_list = [] for host in hosts: info = {'_type': _HOST_LABEL_TYPE, 'hostname': host.hostname, 'labels': [label.name for label in host.labels.all()], 'time_index': time_index} data_list.append(info) if not autotest_es.bulk_post(data_list, log_time_recorded=False): raise Exception('Failed to upload host label info.') # After all host label information is logged, save the time stamp. autotest_es.post(use_http=True, type_str=_HOST_LABEL_TIME_INDEX_TYPE, metadata={'time_index': time_index}, log_time_recorded=False) logging.info('Finished collecting host labels for %d hosts.', len(hosts))
def record_suite_runtime(suite_job_id, suite_name, board, build, num_child_jobs, runtime_in_secs): """Record suite runtime. @param suite_job_id: The job id of the suite for which we are going to collect stats. @param suite_name: The suite name, e.g. 'bvt', 'dummy'. @param board: The target board for which the suite is run, e.g., 'lumpy', 'link'. @param build: The build for which the suite is run, e.g. 'lumpy-release/R35-5712.0.0'. @param num_child_jobs: Total number of child jobs of the suite. @param runtime_in_secs: Duration of the suite from the start to the end. """ metadata = { 'suite_job_id': suite_job_id, 'suite_name': suite_name, 'board': board, 'build': build, 'num_child_jobs': num_child_jobs, 'duration': runtime_in_secs} autotest_es.post(type_str=SUITE_RUNTIME_KEY, metadata=metadata)
def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url, job_folder, machines): """Run the server job with server-side packaging. @param job: The server job object. @param container_name: Name of the container to run the test. @param job_id: ID of the test job. @param results: Folder to store results. This could be different from parser.options.results: parser.options.results can be set to None for results to be stored in a temp folder. results can be None for autoserv run requires no logging. @param parser: Command line parser that contains the options. @param ssp_url: url of the staged server-side package. @param job_folder: Name of the job result folder. @param machines: A list of machines to run the test. """ bucket = lxc.ContainerBucket() control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != '' else None) try: dut_name = machines[0] if len(machines) >= 1 else None test_container = bucket.setup_test(container_name, job_id, ssp_url, results, control=control, job_folder=job_folder, dut_name=dut_name) except Exception as e: job.record( 'FAIL', None, None, 'Failed to setup container for test: %s. Check logs in ' 'ssp_logs folder for more details.' % e) raise args = sys.argv[:] args.remove('--require-ssp') # --parent_job_id is only useful in autoserv running in host, not in # container. Include this argument will cause test to fail for builds before # CL 286265 was merged. if '--parent_job_id' in args: index = args.index('--parent_job_id') args.remove('--parent_job_id') # Remove the actual parent job id in command line arg. del args[index] # A dictionary of paths to replace in the command line. Key is the path to # be replaced with the one in value. paths_to_replace = {} # Replace the control file path with the one in container. if control: container_control_filename = os.path.join(lxc.CONTROL_TEMP_PATH, os.path.basename(control)) paths_to_replace[control] = container_control_filename # Update result directory with the one in container. container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder) if parser.options.results: paths_to_replace[parser.options.results] = container_result_dir # Update parse_job directory with the one in container. The assumption is # that the result folder to be parsed is always the same as the results_dir. if parser.options.parse_job: paths_to_replace[parser.options.parse_job] = container_result_dir args = [paths_to_replace.get(arg, arg) for arg in args] # Apply --use-existing-results, results directory is aready created and # mounted in container. Apply this arg to avoid exception being raised. if not '--use-existing-results' in args: args.append('--use-existing-results') # Make sure autoserv running in container using a different pid file. if not '--pidfile-label' in args: args.extend(['--pidfile-label', 'container_autoserv']) cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args]) logging.info('Run command in container: %s', cmd_line) success = False try: test_container.attach_run(cmd_line) success = True except Exception as e: # If the test run inside container fails without generating any log, # write a message to status.log to help troubleshooting. debug_files = os.listdir(os.path.join(results, 'debug')) if not debug_files: job.record( 'FAIL', None, None, 'Failed to run test inside the container: %s. Check ' 'logs in ssp_logs folder for more details.' % e) raise finally: metrics.Counter('chromeos/autotest/experimental/execute_job_in_ssp' ).increment(fields={'success': success}) # metadata is uploaded separately so it can use http to upload. metadata = { 'drone': socket.gethostname(), 'job_id': job_id, 'success': success } autotest_es.post(use_http=True, type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE, metadata=metadata) test_container.destroy()
def main(): """Main entrance.""" start_time = datetime.datetime.now() # Record the processed jobs so that # we can send the duration of parsing to metadata db. processed_jobs = set() options, args = parse_args() parse_options = _ParseOptions(options.reparse, options.mailit, options.dry_run, options.suite_report, options.datastore_creds, options.export_to_gcloud_path) results_dir = os.path.abspath(args[0]) assert os.path.exists(results_dir) pid_file_manager = pidfile.PidFileManager("parser", results_dir) if options.write_pidfile: pid_file_manager.open_file() try: # build up the list of job dirs to parse if options.singledir: jobs_list = [results_dir] else: jobs_list = [ os.path.join(results_dir, subdir) for subdir in os.listdir(results_dir) ] # build up the database db = tko_db.db(autocommit=False, host=options.db_host, user=options.db_user, password=options.db_pass, database=options.db_name) # parse all the jobs for path in jobs_list: lockfile = open(os.path.join(path, ".parse.lock"), "w") flags = fcntl.LOCK_EX if options.noblock: flags |= fcntl.LOCK_NB try: fcntl.flock(lockfile, flags) except IOError, e: # lock is not available and nonblock has been requested if e.errno == errno.EWOULDBLOCK: lockfile.close() continue else: raise # something unexpected happened try: new_jobs = parse_path(db, path, options.level, parse_options) processed_jobs.update(new_jobs) finally: fcntl.flock(lockfile, fcntl.LOCK_UN) lockfile.close() except Exception as e: pid_file_manager.close_file(1) metadata = { 'results_dir': results_dir, 'error': str(e), 'details': traceback.format_exc() } autotest_es.post(use_http=True, type_str='parse_failure_final', metadata=metadata) raise else: pid_file_manager.close_file(0) duration_secs = (datetime.datetime.now() - start_time).total_seconds() if options.record_duration: record_parsing(processed_jobs, duration_secs)
def parse_one(db, jobname, path, parse_options): """Parse a single job. Optionally send email on failure. @param db: database object. @param jobname: the tag used to search for existing job in db, e.g. '1234-chromeos-test/host1' @param path: The path to the results to be parsed. @param parse_options: _ParseOptions instance. """ reparse = parse_options.reparse mail_on_failure = parse_options.mail_on_failure dry_run = parse_options.dry_run suite_report = parse_options.suite_report datastore_creds = parse_options.datastore_creds export_to_gcloud_path = parse_options.export_to_gcloud_path tko_utils.dprint("\nScanning %s (%s)" % (jobname, path)) old_job_idx = db.find_job(jobname) # old tests is a dict from tuple (test_name, subdir) to test_idx old_tests = {} if old_job_idx is not None: if not reparse: tko_utils.dprint("! Job is already parsed, done") return raw_old_tests = db.select("test_idx,subdir,test", "tko_tests", {"job_idx": old_job_idx}) if raw_old_tests: old_tests = dict(((test, subdir), test_idx) for test_idx, subdir, test in raw_old_tests) # look up the status version job_keyval = models.job.read_keyval(path) status_version = job_keyval.get("status_version", 0) # parse out the job parser = parser_lib.parser(status_version) job = parser.make_job(path) status_log = os.path.join(path, "status.log") if not os.path.exists(status_log): status_log = os.path.join(path, "status") if not os.path.exists(status_log): tko_utils.dprint("! Unable to parse job, no status file") return # parse the status logs tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname)) status_lines = open(status_log).readlines() parser.start(job) tests = parser.end(status_lines) # parser.end can return the same object multiple times, so filter out dups job.tests = [] already_added = set() for test in tests: if test not in already_added: already_added.add(test) job.tests.append(test) # try and port test_idx over from the old tests, but if old tests stop # matching up with new ones just give up if reparse and old_job_idx is not None: job.index = old_job_idx for test in job.tests: test_idx = old_tests.pop((test.testname, test.subdir), None) if test_idx is not None: test.test_idx = test_idx else: tko_utils.dprint("! Reparse returned new test " "testname=%r subdir=%r" % (test.testname, test.subdir)) if not dry_run: for test_idx in old_tests.itervalues(): where = {'test_idx': test_idx} db.delete('tko_iteration_result', where) db.delete('tko_iteration_perf_value', where) db.delete('tko_iteration_attributes', where) db.delete('tko_test_attributes', where) db.delete('tko_test_labels_tests', {'test_id': test_idx}) db.delete('tko_tests', where) job.build = None job.board = None job.build_version = None job.suite = None if job.label: label_info = site_utils.parse_job_name(job.label) if label_info: job.build = label_info.get('build', None) job.build_version = label_info.get('build_version', None) job.board = label_info.get('board', None) job.suite = label_info.get('suite', None) # Upload job details to Sponge. if not dry_run: sponge_url = sponge_utils.upload_results(job, log=tko_utils.dprint) if sponge_url: job.keyval_dict['sponge_url'] = sponge_url # check for failures message_lines = [""] job_successful = True for test in job.tests: if not test.subdir: continue tko_utils.dprint("* testname, status, reason: %s %s %s" % (test.subdir, test.status, test.reason)) if test.status != 'GOOD': job_successful = False message_lines.append( format_failure_message(jobname, test.kernel.base, test.subdir, test.status, test.reason)) try: message = "\n".join(message_lines) if not dry_run: # send out a email report of failure if len(message) > 2 and mail_on_failure: tko_utils.dprint( "Sending email report of failure on %s to %s" % (jobname, job.user)) mailfailure(jobname, job, message) # write the job into the database. job_data = db.insert_job(jobname, job, parent_job_id=job_keyval.get( constants.PARENT_JOB_ID, None)) # Upload perf values to the perf dashboard, if applicable. for test in job.tests: perf_uploader.upload_test(job, test, jobname) # Although the cursor has autocommit, we still need to force it to # commit existing changes before we can use django models, otherwise # it will go into deadlock when django models try to start a new # trasaction while the current one has not finished yet. db.commit() # Handle retry job. orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID, None) if orig_afe_job_id: orig_job_idx = tko_models.Job.objects.get( afe_job_id=orig_afe_job_id).job_idx _invalidate_original_tests(orig_job_idx, job.index) except Exception as e: metadata = { 'path': path, 'error': str(e), 'details': traceback.format_exc() } tko_utils.dprint("Hit exception while uploading to tko db:\n%s" % traceback.format_exc()) autotest_es.post(use_http=True, type_str='parse_failure', metadata=metadata) raise e # Serializing job into a binary file try: from autotest_lib.tko import tko_pb2 from autotest_lib.tko import job_serializer serializer = job_serializer.JobSerializer() binary_file_name = os.path.join(path, "job.serialize") serializer.serialize_to_binary(job, jobname, binary_file_name) if reparse: site_export_file = "autotest_lib.tko.site_export" site_export = utils.import_site_function(__file__, site_export_file, "site_export", _site_export_dummy) site_export(binary_file_name) except ImportError: tko_utils.dprint("DEBUG: tko_pb2.py doesn't exist. Create by " "compiling tko/tko.proto.") if not dry_run: db.commit() # Generate a suite report. # Check whether this is a suite job, a suite job will be a hostless job, its # jobname will be <JOB_ID>-<USERNAME>/hostless, the suite field will not be # NULL. Only generate timeline report when datastore_parent_key is given. try: datastore_parent_key = job_keyval.get('datastore_parent_key', None) if (suite_report and jobname.endswith('/hostless') and job_data['suite'] and datastore_parent_key): tko_utils.dprint('Start dumping suite timing report...') timing_log = os.path.join(path, 'suite_timing.log') dump_cmd = ( "%s/site_utils/dump_suite_report.py %s " "--output='%s' --debug" % (common.autotest_dir, job_data['afe_job_id'], timing_log)) subprocess.check_output(dump_cmd, shell=True) tko_utils.dprint('Successfully finish dumping suite timing report') if (datastore_creds and export_to_gcloud_path and os.path.exists(export_to_gcloud_path)): upload_cmd = [ export_to_gcloud_path, datastore_creds, timing_log, '--parent_key', repr(tuple(datastore_parent_key)) ] tko_utils.dprint('Start exporting timeline report to gcloud') subprocess.check_output(upload_cmd) tko_utils.dprint('Successfully export timeline report to ' 'gcloud') else: tko_utils.dprint('DEBUG: skip exporting suite timeline to ' 'gcloud, because either gcloud creds or ' 'export_to_gcloud script is not found.') except Exception as e: tko_utils.dprint("WARNING: fail to dump/export suite report. " "Error:\n%s" % e) # Mark GS_OFFLOADER_NO_OFFLOAD in gs_offloader_instructions at the end of # the function, so any failure, e.g., db connection error, will stop # gs_offloader_instructions being updated, and logs can be uploaded for # troubleshooting. if job_successful: # Check if we should not offload this test's results. if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False): # Update the gs_offloader_instructions json file. gs_instructions_file = os.path.join( path, constants.GS_OFFLOADER_INSTRUCTIONS) gs_offloader_instructions = {} if os.path.exists(gs_instructions_file): with open(gs_instructions_file, 'r') as f: gs_offloader_instructions = json.load(f) gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True with open(gs_instructions_file, 'w') as f: json.dump(gs_offloader_instructions, f)
def setup_test(self, name, job_id, server_package_url, result_path, control=None, skip_cleanup=False, job_folder=None, dut_name=None): """Setup test container for the test job to run. The setup includes: 1. Install autotest_server package from given url. 2. Copy over local shadow_config.ini. 3. Mount local site-packages. 4. Mount test result directory. TODO(dshi): Setup also needs to include test control file for autoserv to run in container. @param name: Name of the container. @param job_id: Job id for the test job to run in the test container. @param server_package_url: Url to download autotest_server package. @param result_path: Directory to be mounted to container to store test results. @param control: Path to the control file to run the test job. Default is set to None. @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot container failures. @param job_folder: Folder name of the job, e.g., 123-debug_user. @param dut_name: Name of the dut to run test, used as the hostname of the container. Default is None. @return: A Container object for the test container. @raise ContainerError: If container does not exist, or not running. """ start_time = time.time() if not os.path.exists(result_path): raise error.ContainerError('Result directory does not exist: %s', result_path) result_path = os.path.abspath(result_path) # Save control file to result_path temporarily. The reason is that the # control file in drone_tmp folder can be deleted during scheduler # restart. For test not using SSP, the window between test starts and # control file being picked up by the test is very small (< 2 seconds). # However, for tests using SSP, it takes around 1 minute before the # container is setup. If scheduler is restarted during that period, the # control file will be deleted, and the test will fail. if control: control_file_name = os.path.basename(control) safe_control = os.path.join(result_path, control_file_name) utils.run('cp %s %s' % (control, safe_control)) # Create test container from the base container. container = self.create_from_base(name) # Update the hostname of the test container to be `dut_name`. # Some TradeFed tests use hostname in test results, which is used to # group test results in dashboard. The default container name is set to # be the name of the folder, which is unique (as it is composed of job # id and timestamp. For better result view, the container's hostname is # set to be a string containing the dut hostname. if dut_name: config_file = os.path.join(container.container_path, name, 'config') lxc_utsname_setting = ( 'lxc.utsname = ' + CONTAINER_UTSNAME_FORMAT % dut_name.replace('.', '_')) utils.run(APPEND_CMD_FMT % { 'content': lxc_utsname_setting, 'file': config_file }) # Deploy server side package usr_local_path = os.path.join(container.rootfs, 'usr', 'local') autotest_pkg_path = os.path.join(usr_local_path, 'autotest_server_package.tar.bz2') autotest_path = os.path.join(usr_local_path, 'autotest') # sudo is required so os.makedirs may not work. utils.run('sudo mkdir -p %s' % usr_local_path) download_extract(server_package_url, autotest_pkg_path, usr_local_path) deploy_config_manager = lxc_config.DeployConfigManager(container) deploy_config_manager.deploy_pre_start() # Copy over control file to run the test job. if control: container_drone_temp = os.path.join(autotest_path, 'drone_tmp') utils.run('sudo mkdir -p %s' % container_drone_temp) container_control_file = os.path.join(container_drone_temp, control_file_name) # Move the control file stored in the result folder to container. utils.run('sudo mv %s %s' % (safe_control, container_control_file)) if IS_MOBLAB: site_packages_path = MOBLAB_SITE_PACKAGES site_packages_container_path = MOBLAB_SITE_PACKAGES_CONTAINER[1:] else: site_packages_path = os.path.join(common.autotest_dir, 'site-packages') site_packages_container_path = os.path.join( lxc_config.CONTAINER_AUTOTEST_DIR, 'site-packages') mount_entries = [ (site_packages_path, site_packages_container_path, True), (os.path.join(common.autotest_dir, 'puppylab'), os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 'puppylab'), True), (result_path, os.path.join(RESULT_DIR_FMT % job_folder), False), ] for mount_config in deploy_config_manager.mount_configs: mount_entries.append((mount_config.source, mount_config.target, mount_config.readonly)) # Update container config to mount directories. for source, destination, readonly in mount_entries: container.mount_dir(source, destination, readonly) # Update file permissions. # TODO(dshi): crbug.com/459344 Skip following action when test container # can be unprivileged container. utils.run('sudo chown -R root "%s"' % autotest_path) utils.run('sudo chgrp -R root "%s"' % autotest_path) container.start(name) deploy_config_manager.deploy_post_start() container.modify_import_order() container.verify_autotest_setup(job_folder) autotest_es.post(use_http=True, type_str=CONTAINER_CREATE_METADB_TYPE, metadata={ 'drone': socket.gethostname(), 'job_id': job_id, 'time_used': time.time() - start_time, 'success': True }) logging.debug('Test container %s is set up.', name) return container
def setup_test(self, name, job_id, server_package_url, result_path, control=None, skip_cleanup=False): """Setup test container for the test job to run. The setup includes: 1. Install autotest_server package from given url. 2. Copy over local shadow_config.ini. 3. Mount local site-packages. 4. Mount test result directory. TODO(dshi): Setup also needs to include test control file for autoserv to run in container. @param name: Name of the container. @param job_id: Job id for the test job to run in the test container. @param server_package_url: Url to download autotest_server package. @param result_path: Directory to be mounted to container to store test results. @param control: Path to the control file to run the test job. Default is set to None. @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot container failures. @return: A Container object for the test container. @raise ContainerError: If container does not exist, or not running. """ start_time = time.time() if not os.path.exists(result_path): raise error.ContainerError('Result directory does not exist: %s', result_path) result_path = os.path.abspath(result_path) # Create test container from the base container. container = self.create_from_base(name) # Deploy server side package usr_local_path = os.path.join(container.rootfs, 'usr', 'local') autotest_pkg_path = os.path.join(usr_local_path, 'autotest_server_package.tar.bz2') autotest_path = os.path.join(usr_local_path, 'autotest') # sudo is required so os.makedirs may not work. utils.run('sudo mkdir -p %s'% usr_local_path) download_extract(server_package_url, autotest_pkg_path, usr_local_path) deploy_config_manager = lxc_config.DeployConfigManager(container) deploy_config_manager.deploy_pre_start() # Copy over control file to run the test job. if control: container_drone_temp = os.path.join(autotest_path, 'drone_tmp') utils.run('sudo mkdir -p %s'% container_drone_temp) container_control_file = os.path.join( container_drone_temp, os.path.basename(control)) utils.run('sudo cp %s %s' % (control, container_control_file)) if IS_MOBLAB: site_packages_path = MOBLAB_SITE_PACKAGES site_packages_container_path = MOBLAB_SITE_PACKAGES_CONTAINER[1:] else: site_packages_path = os.path.join(common.autotest_dir, 'site-packages') site_packages_container_path = os.path.join( lxc_config.CONTAINER_AUTOTEST_DIR, 'site-packages') mount_entries = [(site_packages_path, site_packages_container_path, True), (os.path.join(common.autotest_dir, 'puppylab'), os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 'puppylab'), True), (result_path, os.path.join(RESULT_DIR_FMT % job_id), False), ] # Update container config to mount directories. for source, destination, readonly in mount_entries: container.mount_dir(source, destination, readonly) # Update file permissions. # TODO(dshi): crbug.com/459344 Skip following action when test container # can be unprivileged container. utils.run('sudo chown -R root "%s"' % autotest_path) utils.run('sudo chgrp -R root "%s"' % autotest_path) container.start(name) deploy_config_manager.deploy_post_start() container.modify_import_order() container.verify_autotest_setup(job_id) autotest_es.post(use_http=True, type_str=CONTAINER_CREATE_METADB_TYPE, metadata={'drone': socket.gethostname(), 'job_id': job_id, 'time_used': time.time() - start_time, 'success': True}) logging.debug('Test container %s is set up.', name) return container