def PerformStage(self): if (not (self._run.options.buildbot or self._run.options.remote_trybot) and self._run.options.clobber): if not commands.ValidateClobber(self._build_root): cros_build_lib.Die("--clobber in local mode must be approved.") # If we can't get a manifest out of it, then it's not usable and must be # clobbered. manifest = None delete_chroot = False if not self._run.options.clobber: try: manifest = git.ManifestCheckout.Cached(self._build_root, search=False) except (KeyboardInterrupt, MemoryError, SystemExit): raise except Exception as e: # Either there is no repo there, or the manifest isn't usable. If the # directory exists, log the exception for debugging reasons. Either # way, the checkout needs to be wiped since it's in an unknown # state. if os.path.exists(self._build_root): logging.warning("ManifestCheckout at %s is unusable: %s", self._build_root, e) delete_chroot = True # Clean mount points first to be safe about deleting. chroot_path = os.path.join(self._build_root, constants.DEFAULT_CHROOT_DIR) cros_sdk_lib.CleanupChrootMount(chroot=chroot_path) osutils.UmountTree(self._build_root) if not delete_chroot: delete_chroot = not self.CanReuseChroot(chroot_path) # If we're going to delete the chroot and we can use a snapshot instead, # try to revert. If the revert succeeds, we don't need to delete after all. if delete_chroot and self.CanUseChrootSnapshotToDelete(chroot_path): delete_chroot = not self._RevertChrootToCleanSnapshot() # Re-mount chroot image if it exists so that subsequent steps can clean up # inside. if not delete_chroot and self._run.config.chroot_use_image: try: cros_sdk_lib.MountChroot(chroot=chroot_path, create=False) except cros_build_lib.RunCommandError as e: logging.error( 'Unable to mount chroot under %s. Deleting chroot. ' 'Error: %s', self._build_root, e) delete_chroot = True if manifest is None: self._DeleteChroot() repository.ClearBuildRoot(self._build_root, self._run.options.preserve_paths) else: tasks = [ self._BuildRootGitCleanup, self._WipeOldOutput, self._DeleteArchivedTrybotImages, self._DeleteArchivedPerfResults, self._DeleteAutotestSitePackages ] if self._run.options.chrome_root: tasks.append(self._DeleteChromeBuildOutput) if delete_chroot: tasks.append(self._DeleteChroot) else: tasks.append(self._CleanChroot) # Only enable CancelObsoleteSlaveBuilds on the master builds # which use the Buildbucket scheduler, it checks for builds in # ChromiumOs and ChromeOs waterfalls. if (config_lib.UseBuildbucketScheduler(self._run.config) and config_lib.IsMasterBuild(self._run.config)): tasks.append(self.CancelObsoleteSlaveBuilds) parallel.RunParallelSteps(tasks) # If chroot.img still exists after everything is cleaned up, it means we're # planning to reuse it. This chroot was created by the previous run, so its # creation isn't affected by any potential changes in the current run. # Therefore, if this run fails, having the subsequent run revert to this # snapshot will still produce a clean chroot. If this run succeeds, the # next run will reuse the chroot without needing to revert it. Thus, taking # a snapshot now should be correct regardless of whether this run will # ultimately succeed or not. if os.path.exists(chroot_path + '.img'): self._CreateCleanSnapshot()
def testParallelMany(self): """Same as testParallel, but with many more processes for stressing.""" parallel.RunParallelSteps([self.testParallel] * 40)
def PerformStage(self): """Invoke payload generation. If testing is enabled, schedule tests. This method is intended to be safe to invoke inside a process. """ # Convert to release tools naming for channels. if not self.channel.endswith('-channel'): self.channel += '-channel' with osutils.TempDir(sudo_rm=True) as tempdir: # Create the definition of the build to generate payloads for. build = gspaths.Build(channel=self.channel, board=self.board, version=self.version, bucket=gspaths.ChromeosReleases.BUCKET) payload_build = gspaths.Build(build) if self.debug: payload_build.bucket = gspaths.ChromeosReleases.TEST_BUCKET try: # Generate the payloads. self._PrintLoudly('Starting %s, %s, %s' % (self.channel, self.version, self.board)) paygen = paygen_build_lib.PaygenBuild( build, payload_build, work_dir=tempdir, site_config=self._run.site_config, dry_run=self.debug, skip_delta_payloads=self.skip_delta_payloads, skip_duts_check=self.skip_duts_check) testdata = paygen.CreatePayloads() # Now, schedule the payload tests if desired. if not self.skip_testing: (suite_name, archive_board, archive_build, payload_test_configs) = testdata # For unified builds, only test against the specified models. if self._run.config.models: models = [] for model in self._run.config.models: # 'au' is a test suite generated in ge_build_config.json if model.test_suites and 'au' in model.test_suites: models.append(model) if len(models) > 1: fsi_configs = set( p for p in payload_test_configs if p.payload_type == paygen_build_lib.PAYLOAD_TYPE_FSI) non_fsi_configs = set(p for p in payload_test_configs if p not in fsi_configs) stages = self._ScheduleForApplicableModels( archive_board, archive_build, fsi_configs, suite_name) stages += self._ScheduleForModels( archive_board, archive_build, models, non_fsi_configs, suite_name) steps = [stage.Run for stage in stages] parallel.RunParallelSteps(steps) elif len(models) == 1: model = models[0] PaygenTestStage( self._run, self.buildstore, suite_name, archive_board, model.name, model.lab_board_name, self.channel, archive_build, self.skip_duts_check, self.debug, payload_test_configs, config_lib.GetHWTestEnv( self._run.config, model_config=model)).Run() else: lab_board_name = config_lib.GetNonUniBuildLabBoardName( archive_board) PaygenTestStage( self._run, self.buildstore, suite_name, archive_board, None, lab_board_name, self.channel, archive_build, self.skip_duts_check, self.debug, payload_test_configs, config_lib.GetHWTestEnv(self._run.config)).Run() except (paygen_build_lib.BuildLocked) as e: # These errors are normal if it's possible that another builder is # processing the same build. (perhaps by a trybot generating payloads on # request). logging.info('PaygenBuild for %s skipped because: %s', self.channel, e)
def testRunParallelSteps(self): """Make sure RunParallelSteps is mocked out.""" with ParallelMock(): parallel.RunParallelSteps([self._Callback]) self.assertEqual(1, self._calls)
def testFailedPickleOnReturn(self): """PicklingError should be thrown when a return value fails to pickle.""" with self.assertRaises(parallel.BackgroundFailure): parallel.RunParallelSteps([self._BadPickler], return_values=True)
def _ParallelPrinter(self): parallel.RunParallelSteps([self._FastPrinter] * _NUM_THREADS)
def _NestedParallelPrinter(self): parallel.RunParallelSteps([self._ParallelPrinter])
def GenerateAlertsSummary(db, builds=None, logdog_client=None, milo_client=None, allow_experimental=False): """Generates the full set of alerts to send to Sheriff-o-Matic. Args: db: cidb.CIDBConnection object. builds: A list of (waterfall, builder_name, severity) tuples to summarize. Defaults to SOM_BUILDS[SOM_TREE]. logdog_client: logdog.LogdogClient object. milo_client: milo.MiloClient object. allow_experimental: Boolean if non-important builds should be included. Returns: JSON-marshalled AlertsSummary object. """ if not builds: builds = constants.SOM_BUILDS[constants.SOM_TREE] if not logdog_client: logdog_client = logdog.LogdogClient() if not milo_client: milo_client = milo.MiloClient() funcs = [] now = datetime.datetime.utcnow() # Iterate over relevant masters. # build_tuple is either: waterfall, build_config, severity # or: build_id, severity for build_tuple in builds: # Find the specified build. if len(build_tuple) == 2: # pylint: disable=unbalanced-tuple-unpacking build_id, severity = build_tuple # pylint: enable=unbalanced-tuple-unpacking master = db.GetBuildStatus(build_id) if master is None: logging.warn('Could not locate build id %s', build_id) continue wfall = master['waterfall'] build_config = master['build_config'] elif len(build_tuple) == 3: wfall, build_config, severity = build_tuple master = db.GetMostRecentBuild(wfall, build_config) if master is None: logging.warn('Could not locate build %s %s', wfall, build_config) continue else: logging.error('Invalid build tuple: %s' % str(build_tuple)) continue statuses = [master] stages = db.GetBuildStages(master['id']) exceptions = db.GetBuildsFailures([master['id']]) messages = db.GetBuildMessages(master['id']) annotations = [] logging.info( '%s %s (id %d): single/master build, %d stages, %d messages', wfall, build_config, master['id'], len(stages), len(messages)) # Find any slave builds, and the individual slave stages. slave_statuses = db.GetSlaveStatuses(master['id']) if len(slave_statuses): statuses.extend(slave_statuses) slave_stages = db.GetSlaveStages(master['id']) stages.extend(slave_stages) exceptions.extend(db.GetSlaveFailures(master['id'])) annotations.extend( db.GetAnnotationsForBuilds([master['id'] ]).get(master['id'], [])) logging.info('- %d slaves, %d slave stages, %d annotations', len(slave_statuses), len(slave_stages), len(annotations)) # Look for failing and inflight (signifying timeouts) slave builds. for build in sorted(statuses, key=lambda s: s['builder_name']): funcs.append( lambda build_=build, stages_=stages, exceptions_=exceptions, messages_=messages, annotations_=annotations, siblings_= statuses, severity_=severity: GenerateBuildAlert( build_, stages_, exceptions_, messages_, annotations_, siblings_, severity_, now, db, logdog_client, milo_client, allow_experimental=allow_experimental)) alerts = [ alert for alert in parallel.RunParallelSteps(funcs, return_values=True) if alert ] revision_summaries = {} summary = som.AlertsSummary(alerts, revision_summaries, ToEpoch(now)) return json.dumps(summary, cls=ObjectEncoder)
def testMultipleHelloWorlds(self): """Test that multiple threads can be created.""" parallel.RunParallelSteps([self.testParallelHelloWorld] * 2)
def ArchiveReleaseArtifacts(): with self.ArtifactUploader(self._release_upload_queue, archive=False): steps = [BuildAndArchiveAllImages, ArchiveFirmwareImages] parallel.RunParallelSteps(steps) PushImage()
def GeneratePayloads(self): """Iterates through payload requirements and generates them. This is the main method of this class. It iterates through payloads it needs, generates them, and builds a Cache that can be used by the test harness to reference these payloads. Returns: The cache as a Python dict. """ def GeneratePayload(payload, log_file): """Returns the error code from generating an update with the devserver.""" # Base command. command = ['start_devserver', '--pregenerate_update', '--exit'] in_chroot_key = in_chroot_base = None in_chroot_target = path_util.ToChrootPath(payload.target) if payload.base: in_chroot_base = path_util.ToChrootPath(payload.base) if payload.key: in_chroot_key = path_util.ToChrootPath(payload.key) command.append('--image=%s' % in_chroot_target) if payload.base: command.append('--src_image=%s' % in_chroot_base) if payload.key: command.append('--private_key=%s' % in_chroot_key) if payload.base: debug_message = 'delta payload from %s to %s' % ( payload.base, payload.target) else: debug_message = 'full payload to %s' % payload.target if payload.for_vm: debug_message += ' and not patching the kernel.' if in_chroot_key: debug_message = 'Generating a signed %s' % debug_message else: debug_message = 'Generating an unsigned %s' % debug_message logging.info(debug_message) try: with timeout_util.Timeout(constants.MAX_TIMEOUT_SECONDS): cros_build_lib.SudoRunCommand(command, log_stdout_to_file=log_file, combine_stdout_stderr=True, enter_chroot=True, print_cmd=False, cwd=constants.SOURCE_ROOT) except (timeout_util.TimeoutError, cros_build_lib.RunCommandError): # Print output first, then re-raise the exception. if os.path.isfile(log_file): logging.error(osutils.ReadFile(log_file)) raise def ProcessOutput(log_files): """Processes results from the log files of GeneratePayload invocations. Args: log_files: A list of filename strings with stored logs. Returns: An array of cache entries from the log files. Raises: payload_generation_exception.PayloadGenerationException: Raises this exception if we failed to parse the devserver output to find the location of the update path. """ # Looking for this line in the output. key_line_re = re.compile(r'^PREGENERATED_UPDATE=([\w/./+]+)') return_array = [] for log_file in log_files: with open(log_file) as f: for line in f: match = key_line_re.search(line) if match: # Convert cache/label/update.gz -> update/cache/label. path_to_update_gz = match.group(1).rstrip() path_to_update_dir = path_to_update_gz.rpartition( '/update.gz')[0] # Check that we could actually parse the directory correctly. if not path_to_update_dir: raise payload_generation_exception.PayloadGenerationException( 'Payload generated but failed to parse cache directory.' ) return_array.append('/'.join( ['update', path_to_update_dir])) break else: logging.error( 'Could not find PREGENERATED_UPDATE in log:') f.seek(0) for line in f: logging.error(' log: %s', line) # This is not a recoverable error. raise InvalidDevserverOutput( 'Could not parse devserver log') return return_array jobs = [] log_files = [] # Generate list of paylods and list of log files. for payload in self.payloads: fd, log_file = tempfile.mkstemp('GenerateVMUpdate') os.close(fd) # Just want filename so close file immediately. jobs.append(functools.partial(GeneratePayload, payload, log_file)) log_files.append(log_file) # Run update generation code and wait for output. logging.info( 'Generating updates required for this test suite in parallel.') try: parallel.RunParallelSteps(jobs, max_parallel=self.jobs) except parallel.BackgroundFailure as ex: logging.error(ex) raise payload_generation_exception.PayloadGenerationException( 'Failed to generate a required update.') results = ProcessOutput(log_files) # Build the dictionary from our id's and returned cache paths. cache_dictionary = {} for index, payload in enumerate(self.payloads): # Path return is of the form update/cache/directory. update_path = results[index] cache_dictionary[payload.UpdateId()] = update_path # Archive payload to payload directory. if payload.archive and self.nplus1_archive_dir: # Only need directory as we know the rest. path_to_payload_dir = os.path.join( constants.SOURCE_ROOT, 'chroot', self.CHROOT_PATH_TO_DEVSERVER_CACHE, os.path.basename(update_path)) payload_path = os.path.join(path_to_payload_dir, 'update.gz') archive_path = os.path.join(self.nplus1_archive_dir, payload.GetNameForBin()) logging.info('Archiving %s to %s.', payload.GetNameForBin(), archive_path) shutil.copyfile(payload_path, archive_path) if payload.archive_stateful: stateful_path = os.path.join(path_to_payload_dir, 'stateful.tgz') archive_path = os.path.join(self.nplus1_archive_dir, 'stateful.tgz') logging.info('Archiving stateful payload from %s to %s', payload.GetNameForBin(), archive_path) shutil.copyfile(stateful_path, archive_path) return cache_dictionary
def _RunBackgroundStagesForBoard(self, builder_run, board): """Run background board-specific stages for the specified board. Used by _RunBackgroundStagesForBoardAndMarkAsSuccessful. Callers should use that method instead. Args: builder_run: BuilderRun object for these background stages. board: Board name. """ config = builder_run.config # TODO(mtennant): This is the last usage of self.archive_stages. We can # kill it once we migrate its uses to BuilderRun so that none of the # stages below need it as an argument. archive_stage = self.archive_stages[BoardConfig(board, config.name)] if config.afdo_generate_min: self._RunParallelStages([archive_stage]) return # paygen can't complete without push_image. assert not config.paygen or config.push_image if config.build_packages_in_background: self._RunStage(build_stages.BuildPackagesStage, board, update_metadata=True, builder_run=builder_run, afdo_use=config.afdo_use) # While this stage list is run in parallel, the order here dictates the # order that things will be shown in the log. So group things together # that make sense when read in order. Also keep in mind that, since we # gather output manually, early slow stages will prevent any output from # later stages showing up until it finishes. changes = self._GetChangesUnderTest() stage_list = [] if changes: stage_list += [[report_stages.DetectIrrelevantChangesStage, board, changes]] stage_list += [[test_stages.UnitTestStage, board]] # Skip most steps if we're a compilecheck builder. if builder_run.config.compilecheck or builder_run.options.compilecheck: for x in stage_list: self._RunStage(*x, builder_run=builder_run) return stage_list += [[chrome_stages.SimpleChromeWorkflowStage, board]] if config.vm_test_runs > 1: # Run the VMTests multiple times to see if they fail. stage_list += [ [generic_stages.RepeatStage, config.vm_test_runs, test_stages.VMTestStage, board]] else: # Give the VMTests one retry attempt in case failures are flaky. stage_list += [[generic_stages.RetryStage, 1, test_stages.VMTestStage, board]] if config.run_gce_tests: # Give the GCETests one retry attempt in case failures are flaky. stage_list += [[generic_stages.RetryStage, 1, test_stages.GCETestStage, board]] if config.afdo_generate: stage_list += [[afdo_stages.AFDODataGenerateStage, board]] stage_list += [ [release_stages.SignerTestStage, board, archive_stage], [release_stages.SigningStage, board], [release_stages.PaygenStage, board], [test_stages.ImageTestStage, board], [artifact_stages.UploadPrebuiltsStage, board], [artifact_stages.DevInstallerPrebuiltsStage, board], [artifact_stages.DebugSymbolsStage, board], [artifact_stages.CPEExportStage, board], [artifact_stages.UploadTestArtifactsStage, board], ] stage_objs = [self._GetStageInstance(*x, builder_run=builder_run) for x in stage_list] # Build the image first before running the steps. with self._build_image_lock: self._RunStage(build_stages.BuildImageStage, board, builder_run=builder_run, afdo_use=config.afdo_use) parallel.RunParallelSteps([ lambda: self._RunParallelStages(stage_objs + [archive_stage]), lambda: self._RunHWTests(builder_run, board), ])
def _RunBackgroundStagesForBoard(self, builder_run, board): """Run background board-specific stages for the specified board. Used by _RunBackgroundStagesForBoardAndMarkAsSuccessful. Callers should use that method instead. Args: builder_run: BuilderRun object for these background stages. board: Board name. """ config = builder_run.config # TODO(mtennant): This is the last usage of self.archive_stages. We can # kill it once we migrate its uses to BuilderRun so that none of the # stages below need it as an argument. archive_stage = self.archive_stages[BoardConfig(board, config.name)] if config.afdo_generate_min: self._RunParallelStages([archive_stage]) return # paygen can't complete without push_image. assert not config.paygen or config.push_image # While this stage list is run in parallel, the order here dictates the # order that things will be shown in the log. So group things together # that make sense when read in order. Also keep in mind that, since we # gather output manually, early slow stages will prevent any output from # later stages showing up until it finishes. early_stage_list = [ [test_stages.UnitTestStage, board], ] stage_list = [ [test_stages.DebugInfoTestStage, board], ] # Skip most steps if we're a compilecheck builder. if builder_run.config.compilecheck or builder_run.options.compilecheck: board_runattrs = builder_run.GetBoardRunAttrs(board) board_runattrs.SetParallel('test_artifacts_uploaded', False) stages = early_stage_list + stage_list for x in stages: self._RunStage(*x, builder_run=builder_run) return stage_list += [[chrome_stages.SimpleChromeArtifactsStage, board]] if config.gce_tests: stage_list += [[ generic_stages.RetryStage, constants.VM_NUM_RETRIES, vm_test_stages.GCETestStage, board ]] if config.moblab_vm_tests: stage_list += [[vm_test_stages.MoblabVMTestStage, board]] if config.afdo_generate: stage_list += [[afdo_stages.AFDODataGenerateStage, board]] if config.afdo_generate_async: stage_list += [[afdo_stages.GenerateBenchmarkAFDOStage, board]] if config.orderfile_generate: stage_list += [[afdo_stages.GenerateChromeOrderfileStage, board]] if config.orderfile_verify: stage_list += [[afdo_stages.UploadVettedOrderfileStage, board]] if config.kernel_afdo_verify: stage_list += [[afdo_stages.UploadVettedKernelAFDOStage, board]] if config.chrome_afdo_verify: stage_list += [[afdo_stages.UploadVettedChromeAFDOStage, board]] stage_list += [ [release_stages.SignerTestStage, board, archive_stage], [release_stages.SigningStage, board], [release_stages.PaygenStage, board], [test_stages.ImageTestStage, board], [artifact_stages.UploadPrebuiltsStage, board], [artifact_stages.DevInstallerPrebuiltsStage, board], ] if config.run_cpeexport: stage_list += [[artifact_stages.CPEExportStage, board]] if config.run_build_configs_export: stage_list += [[artifact_stages.BuildConfigsExportStage, board]] early_stage_list += [[artifact_stages.UploadTestArtifactsStage, board]] early_stage_objs = [ self._GetStageInstance(*x, builder_run=builder_run) for x in early_stage_list ] stage_objs = [ self._GetStageInstance(*x, builder_run=builder_run) for x in stage_list ] # Build the image first before running the steps. with self._build_image_lock: self._RunStage(build_stages.BuildImageStage, board, builder_run=builder_run, afdo_use=config.afdo_use) # Run UnitTestStage & UploadTestArtifactsStage in a separate pass before # any of the other parallel stages to prevent races with the image # construction in the ArchiveStage. # http://crbug.com/1000374 self._RunParallelStages(early_stage_objs) parallel.RunParallelSteps([ lambda: self._RunParallelStages(stage_objs + [archive_stage]), lambda: self._RunHWTests(builder_run, board), lambda: self._RunDebugSymbolStages(builder_run, board), ]) # Move VMTests out of parallel execution due to high failure rate. # http://crbug/932644 self._RunVMTests(builder_run, board)
def Perform(self): self._CheckDeployType() # If requested, just do the staging step. if self.options.staging_only: self._PrepareStagingDir() return 0 # Check that the build matches the device. Lacros-chrome skips this check as # it's currently board independent. This means that it's possible to deploy # a build of lacros-chrome with a mismatched architecture. We don't try to # prevent this developer foot-gun. if not self.options.lacros: self._CheckBoard() # Ensure that the target directory exists before running parallel steps. self._EnsureTargetDir() logging.info('Preparing device') steps = [ self._GetDeviceInfo, self._CheckConnection, self._MountRootfsAsWritable, self._PrepareStagingDir ] # If this is a lacros build, we only want to restart ash-chrome if # necessary, which is done below. if not self.options.lacros: self._stopped_ui = True steps += ([self._KillLacrosChrome] if self.options.lacros else [self._KillAshChromeIfNeeded]) ret = parallel.RunParallelSteps(steps, halt_on_error=True, return_values=True) self._CheckDeviceFreeSpace(ret[0]) # If the root dir is not writable, try disabling rootfs verification. # (We always do this by default so that developers can write to # /etc/chriome_dev.conf and other directories in the rootfs). if self._root_dir_is_still_readonly.is_set(): if self.options.noremove_rootfs_verification: logging.warning('Skipping disable rootfs verification.') elif not self._DisableRootfsVerification(): logging.warning('Failed to disable rootfs verification.') # If the target dir is still not writable (i.e. the user opted out or the # command failed), abort. if not self.device.IsDirWritable(self.options.target_dir): if self.options.startui and self._stopped_ui: logging.info('Restarting Chrome...') self.device.run('start ui') raise DeployFailure( 'Target location is not writable. Aborting.') if self.options.mount_dir is not None: self._MountTarget() if self.options.lacros: # Update /etc/chrome_dev.conf to include appropriate flags. restart_ui = False result = self.device.run(ENABLE_LACROS_VIA_CONF_COMMAND, shell=True) if result.stdout.strip() == MODIFIED_CONF_FILE: restart_ui = True result = self.device.run(_SET_LACROS_PATH_VIA_CONF_COMMAND % { 'conf_file': _CONF_FILE, 'lacros_path': self.options.target_dir, 'modified_conf_file': MODIFIED_CONF_FILE }, shell=True) if result.stdout.strip() == MODIFIED_CONF_FILE: restart_ui = True if restart_ui: self._KillAshChromeIfNeeded() # Actually deploy Chrome to the device. self._Deploy() if self.options.deploy_test_binaries: self._DeployTestBinaries()