def test_invalid_cpus(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': [], 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 0, # 0 CPUs is not valid 'bazel.mem': 8192, 'bazel.max-executors': 1, } _, implementation = JobPlan.get_build_step_for_job(self._create_job_and_jobplan().id) assert implementation is None get_config.return_value = { 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 2, # Too many 'bazel.mem': 8192, 'bazel.max-executors': 1, } current_app.config['MAX_CPUS_PER_EXECUTOR'] = 1 _, implementation = JobPlan.get_build_step_for_job(self._create_job_and_jobplan().id) assert implementation is None
def test_invalid_num_executors(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': [], 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 1, 'bazel.mem': 1234, 'bazel.max-executors': 0, # invalid } _, implementation = JobPlan.get_build_step_for_job( self._create_job_and_jobplan().id) assert implementation is None get_config.return_value = { 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 1, 'bazel.mem': 1234, 'bazel.max-executors': 11, # too high } _, implementation = JobPlan.get_build_step_for_job( self._create_job_and_jobplan().id) assert implementation is None
def test_invalid_mems(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': [], 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 1, 'bazel.mem': 1025, # Too high 'bazel.max-executors': 1, } current_app.config['MIN_MEM_MB_PER_EXECUTOR'] = 1 current_app.config['MAX_MEM_MB_PER_EXECUTOR'] = 10 _, implementation = JobPlan.get_build_step_for_job(self._create_job_and_jobplan().id) assert implementation is None get_config.return_value = { 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 1, 'bazel.mem': 1025, # Too low 'bazel.max-executors': 1, } current_app.config['MIN_MEM_MB_PER_EXECUTOR'] = 2000 current_app.config['MAX_MEM_MB_PER_EXECUTOR'] = 3000 _, implementation = JobPlan.get_build_step_for_job(self._create_job_and_jobplan().id) assert implementation is None
def test_invalid_num_executors(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': [], 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 1, 'bazel.mem': 1234, 'bazel.max-executors': 0, # invalid } _, implementation = JobPlan.get_build_step_for_job(self._create_job_and_jobplan().id) assert implementation is None get_config.return_value = { 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 1, 'bazel.mem': 1234, 'bazel.max-executors': 11, # too high } _, implementation = JobPlan.get_build_step_for_job(self._create_job_and_jobplan().id) assert implementation is None
def test_invalid_cpus(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': [], 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 0, # 0 CPUs is not valid 'bazel.mem': 8192, 'bazel.max-executors': 1, } _, implementation = JobPlan.get_build_step_for_job( self._create_job_and_jobplan().id) assert implementation is None get_config.return_value = { 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 2, # Too many 'bazel.mem': 8192, 'bazel.max-executors': 1, } current_app.config['MAX_CPUS_PER_EXECUTOR'] = 1 _, implementation = JobPlan.get_build_step_for_job( self._create_job_and_jobplan().id) assert implementation is None
def get(self, step_id): jobstep = JobStep.query.options(joinedload( 'project', innerjoin=True), ).get(step_id) if jobstep is None: return '', 404 jobplan = JobPlan.query.filter( JobPlan.job_id == jobstep.job_id, ).first() # determine if there's an expected snapshot outcome expected_image = SnapshotImage.query.filter( SnapshotImage.job_id == jobstep.job_id, ).first() current_image = None # we only send a current snapshot if we're not expecting to build # a new image if not expected_image: current_image = None if jobplan: current_image = jobplan.snapshot_image if current_image is None and current_app.config['DEFAULT_SNAPSHOT']: current_image = { 'id': current_app.config['DEFAULT_SNAPSHOT'], } context = self.serialize(jobstep) context['commands'] = self.serialize(list(jobstep.commands)) context['snapshot'] = self.serialize(current_image) context['expectedSnapshot'] = self.serialize(expected_image) context['project'] = self.serialize(jobstep.project) context['job'] = self.serialize(jobstep.job) _, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) resource_limits = buildstep.get_resource_limits() if buildstep else {} if resource_limits: context['resourceLimits'] = resource_limits lxc_config = buildstep.get_lxc_config(jobstep) if buildstep else None if lxc_config: context["adapter"] = "lxc" lxc_config = { 'preLaunch': lxc_config.prelaunch, 'postLaunch': lxc_config.postlaunch, 's3Bucket': lxc_config.s3_bucket, 'compression': lxc_config.compression, 'release': lxc_config.release, 'template': lxc_config.template, 'mirror': lxc_config.mirror, 'securityMirror': lxc_config.security_mirror, } context['lxcConfig'] = lxc_config debugConfig = buildstep.debug_config if buildstep else {} if 'debugForceInfraFailure' in jobstep.data: debugConfig['forceInfraFailure'] = jobstep.data[ 'debugForceInfraFailure'] if debugConfig: context['debugConfig'] = self.serialize(debugConfig) return self.respond(context, serialize=False)
def sync_artifact(artifact_id=None, **kwargs): """ Downloads an artifact from jenkins. """ artifact = Artifact.query.get(artifact_id) if artifact is None: return step = artifact.step if step.result == Result.aborted: return _, implementation = JobPlan.get_build_step_for_job(job_id=step.job_id) # TODO(dcramer): we eventually want to abstract the entirety of Jenkins # artifact syncing so that we pull files and then process them if artifact.file: try: implementation.get_artifact_manager(step).process(artifact) except UnrecoverableException: current_app.logger.exception( 'Unrecoverable exception processing artifact %s: %s', artifact.step_id, artifact) else: try: implementation.fetch_artifact(artifact=artifact) except UnrecoverableException: current_app.logger.exception( 'Unrecoverable exception fetching artifact %s: %s', artifact.step_id, artifact)
def post(self, build_id): build = Build.query.options( joinedload('project', innerjoin=True), joinedload('author'), joinedload('source').joinedload('revision'), ).get(build_id) if build is None: return '', 404 if build.status == Status.finished: return '', 204 cancelled = [] # find any active/pending jobs for job in filter(lambda x: x.status != Status.finished, build.jobs): # TODO(dcramer): we make an assumption that there is a single step _, implementation = JobPlan.get_build_step_for_job(job_id=job.id) if not implementation: continue implementation.cancel(job=job) cancelled.append(job) if not cancelled: return '', 204 build.status = Status.finished build.result = Result.aborted db.session.add(build) return self.respond(build)
def sync_job_phases(job, phases=None, implementation=None): if phases is None: phases = JobPhase.query.filter(JobPhase.job_id == job.id) if implementation is None: _, implementation = JobPlan.get_build_step_for_job(job_id=job.id) for phase in phases: sync_phase(phase, implementation)
def test_autogenerated_commands_with_additional_test_flags( self, get_config): get_config.return_value = { 'bazel.additional-test-flags': ['--test_env=testing=123', '--test_env=testing=123'], 'bazel.targets': [ '//foo/bar/baz/...', '//bar/bax/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 2, 'bazel.mem': 1234, 'bazel.max-executors': 3, } mock_vcs = mock.Mock(spec=Vcs) mock_vcs.get_buildstep_checkout_revision.return_value = 'git checkout master' mock_vcs.get_buildstep_checkout_parent_revision.return_value = 'git checkout master^' mock_vcs.get_buildstep_changed_files.return_value = 'git diff --name-only master^..master' job = self._create_job_and_jobplan() with mock.patch.object(job.project.repository, "get_vcs") as mock_get_vcs: mock_get_vcs.return_value = mock_vcs _, implementation = JobPlan.get_build_step_for_job(job.id) collect_tests_expected = """#!/bin/bash -eu sudo apt-get install -y --force-yes bazel python >/dev/null 2>&1 "/var/changes/input/collect-targets" --output-user-root="/bazel/root/path" --target-patterns=//foo/bar/baz/... --target-patterns=//bar/bax/... --test-flags=--spawn_strategy=sandboxed --test-flags=--genrule_strategy=sandboxed --test-flags=--keep_going --test-flags=--test_env=testing=123 --jobs="4" --selective-testing-skip-list={} 2> /dev/null """.strip().format(job.project.get_config_path()) assert implementation.max_executors == 3 assert implementation.artifacts == [] assert implementation.artifact_suffix == '.bazel' assert implementation.resources['cpus'] == 2 assert implementation.resources['mem'] == 1234 assert len(implementation.commands) == 4 assert implementation.commands[0].type == CommandType.setup assert implementation.commands[1].type == CommandType.setup assert implementation.commands[2].type == CommandType.setup assert implementation.commands[ 3].type == CommandType.collect_bazel_targets assert implementation.commands[3].script == collect_tests_expected
def test_invalid_mems(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': [], 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 1, 'bazel.mem': 1025, # Too high 'bazel.max-executors': 1, } current_app.config['MIN_MEM_MB_PER_EXECUTOR'] = 1 current_app.config['MAX_MEM_MB_PER_EXECUTOR'] = 10 _, implementation = JobPlan.get_build_step_for_job( self._create_job_and_jobplan().id) assert implementation is None get_config.return_value = { 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 1, 'bazel.mem': 1025, # Too low 'bazel.max-executors': 1, } current_app.config['MIN_MEM_MB_PER_EXECUTOR'] = 2000 current_app.config['MAX_MEM_MB_PER_EXECUTOR'] = 3000 _, implementation = JobPlan.get_build_step_for_job( self._create_job_and_jobplan().id) assert implementation is None
def test_autogenerated_commands_with_additional_test_flags_invalid(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': ['--keep_going'], # not in whitelist 'bazel.targets': [ '//foo/bar/baz/...', '//bar/bax/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 2, 'bazel.mem': 1234, 'bazel.max-executors': 3, } _, implementation = JobPlan.get_build_step_for_job(self._create_job_and_jobplan().id) assert implementation is None
def create_job(job_id): """ Kicks off a newly created job within a build; enqueued for each job within a new build. """ job = Job.query.get(job_id) if not job: return if job.project.status == ProjectStatus.inactive: current_app.logger.warn('Project is not active: %s', job.project.slug) job.status = Status.finished job.result = Result.aborted db.session.add(job) db.session.flush() return # we might already be marked as finished for various reasons # (such as aborting the task) if job.status == Status.finished: return _, implementation = JobPlan.get_build_step_for_job(job_id=job.id) if implementation is None: # TODO(dcramer): record a FailureReason? job.status = Status.finished job.result = Result.aborted db.session.add(job) db.session.flush() current_app.logger.exception('No build plan set %s', job_id) return try: implementation.execute(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed db.session.add(job) db.session.flush() current_app.logger.exception('Unrecoverable exception creating %s', job_id) return sync_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, )
def test_autogenerated_commands_with_exclusions(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': [], 'bazel.targets': [ '//foo/bar/baz/...', '//bar/bax/...', ], 'bazel.exclude-tags': [ 'flaky', 'another_tag', ], 'bazel.cpus': 2, 'bazel.mem': 1234, 'bazel.max-executors': 3, } mock_vcs = mock.Mock(spec=Vcs) mock_vcs.get_buildstep_checkout_revision.return_value = 'git checkout master' mock_vcs.get_buildstep_checkout_parent_revision.return_value = 'git checkout master^' mock_vcs.get_buildstep_changed_files.return_value = 'git diff --name-only master^..master' job = self._create_job_and_jobplan() with mock.patch.object(job.project.repository, "get_vcs") as mock_get_vcs: mock_get_vcs.return_value = mock_vcs _, implementation = JobPlan.get_build_step_for_job(job.id) collect_tests_expected = """#!/bin/bash -eu sudo apt-get install -y --force-yes bazel python >/dev/null 2>&1 "/var/changes/input/collect-targets" --output-user-root="/bazel/root/path" --target-patterns=//foo/bar/baz/... --target-patterns=//bar/bax/... --exclude-tags=flaky --exclude-tags=another_tag --test-flags=--spawn_strategy=sandboxed --test-flags=--genrule_strategy=sandboxed --test-flags=--keep_going --jobs="4" --selective-testing-skip-list={} 2> /dev/null """.strip().format(job.project.get_config_path()) assert implementation.max_executors == 3 assert implementation.artifacts == [] assert implementation.artifact_suffix == '.bazel' assert implementation.resources['cpus'] == 2 assert implementation.resources['mem'] == 1234 assert len(implementation.commands) == 4 assert implementation.commands[0].type == CommandType.setup assert implementation.commands[1].type == CommandType.setup assert implementation.commands[2].type == CommandType.setup assert implementation.commands[3].type == CommandType.collect_bazel_targets assert implementation.commands[3].script == collect_tests_expected
def _sync_artifacts_for_jobstep(step): artifacts = Artifact.query.filter(Artifact.step_id == step.id).all() _, buildstep = JobPlan.get_build_step_for_job(job_id=step.job_id) prefer_artifactstore = buildstep.prefer_artifactstore() artifact_manager = buildstep.get_artifact_manager(step) to_sync = _get_artifacts_to_sync(artifacts, artifact_manager, prefer_artifactstore) # buildstep may want to check for e.g. required artifacts buildstep.verify_final_artifacts(step, to_sync) for artifact in to_sync: sync_artifact.delay_if_needed( artifact_id=artifact.id.hex, task_id=artifact.id.hex, parent_task_id=step.id.hex, )
def test_autogenerated_commands_with_additional_test_flags_invalid( self, get_config): get_config.return_value = { 'bazel.additional-test-flags': ['--keep_going'], # not in whitelist 'bazel.targets': [ '//foo/bar/baz/...', '//bar/bax/...', ], 'bazel.exclude-tags': [], 'bazel.cpus': 2, 'bazel.mem': 1234, 'bazel.max-executors': 3, } _, implementation = JobPlan.get_build_step_for_job( self._create_job_and_jobplan().id) assert implementation is None
def expand_command(self, command, expander, data): jobstep = command.jobstep phase_name = data.get('phase') if not phase_name: phase_name = expander.default_phase_name() new_jobphase = JobPhase( job_id=jobstep.job_id, project_id=jobstep.project_id, label=phase_name, status=Status.queued, ) db.session.add(new_jobphase) _, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) results = [] for future_jobstep in expander.expand( job=jobstep.job, max_executors=jobstep.data['max_executors'], test_stats_from=buildstep.get_test_stats_from()): new_jobstep = buildstep.create_expanded_jobstep( jobstep, new_jobphase, future_jobstep) results.append(new_jobstep) # If there are no tests to run, the phase is done. if len(results) == 0: new_jobphase.status = Status.finished new_jobphase.result = Result.passed db.session.add(new_jobphase) db.session.flush() for new_jobstep in results: sync_job_step.delay_if_needed( step_id=new_jobstep.id.hex, task_id=new_jobstep.id.hex, parent_task_id=new_jobphase.job.id.hex, ) return results
def process_row(agg, jobstep): status = jobstep.status.name current = agg.get(status) or default.copy() current['count'] += 1 if args.check_resources: if jobstep.job_id not in buildstep_for_job_id: buildstep_for_job_id[jobstep.job_id] = JobPlan.get_build_step_for_job(jobstep.job_id)[1] buildstep = buildstep_for_job_id[jobstep.job_id] limits = buildstep.get_resource_limits() req_cpus = limits.get('cpus', DEFAULT_CPUS) req_mem = limits.get('memory', DEFAULT_MEMORY_MB) current['cpus'] += req_cpus current['mem'] += req_mem # Track the oldest jobstep we've seen. if current['created'] is None or jobstep.date_created < current['created']: current['created'] = jobstep.date_created current['jobstep_id'] = jobstep.id agg[status] = current
def process(self, fp, artifact): try: phase_config = json.load(fp) except ValueError: uri = build_web_uri('/find_build/{0}/'.format(self.step.job.build_id.hex)) self.logger.warning('Failed to parse json; (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self.report_malformed() else: _, implementation = JobPlan.get_build_step_for_job(job_id=self.step.job_id) try: implementation.expand_jobs(self.step, phase_config) except ArtifactParseError: uri = build_web_uri('/find_build/{0}/'.format(self.step.job.build_id.hex)) self.logger.warning('malformed %s artifact (step=%s, build=%s)', self.FILENAMES[0], self.step.id.hex, uri, exc_info=True) self.report_malformed() except Exception: uri = build_web_uri('/find_build/{0}/'.format(self.step.job.build_id.hex)) self.logger.warning('expand_jobs failed (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self.step.result = Result.infra_failed db.session.add(self.step) db.session.commit()
def expand_command(self, command, expander, data): jobstep = command.jobstep phase_name = data.get('phase') if not phase_name: phase_name = expander.default_phase_name() new_jobphase = JobPhase( job_id=jobstep.job_id, project_id=jobstep.project_id, label=phase_name, status=Status.queued, ) db.session.add(new_jobphase) _, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) results = [] for future_jobstep in expander.expand(max_executors=jobstep.data['max_executors'], test_stats_from=buildstep.get_test_stats_from()): new_jobstep = buildstep.create_expanded_jobstep(jobstep, new_jobphase, future_jobstep) results.append(new_jobstep) # If there are no tests to run, the phase is done. if len(results) == 0: new_jobphase.status = Status.finished new_jobphase.result = Result.passed db.session.add(new_jobphase) db.session.flush() for new_jobstep in results: sync_job_step.delay_if_needed( step_id=new_jobstep.id.hex, task_id=new_jobstep.id.hex, parent_task_id=new_jobphase.job.id.hex, ) return results
def get(self): """ New GET method that returns a priority sorted list of possible jobsteps to allocate. The scheduler can then decide which ones it can actually allocate and makes a POST request to mark these as such with Changes. Args (in the form of a query string): cluster (Optional[str]): The cluster to look for jobsteps in. limit (int (default 200)): Maximum number of jobsteps to return. """ args = self.get_parser.parse_args() cluster = args.cluster limit = args.limit with statsreporter.stats().timer('jobstep_allocate_get'): available_allocations = self.find_next_jobsteps(limit, cluster) jobstep_results = [] for jobstep in available_allocations: jobplan, buildstep = JobPlan.get_build_step_for_job( jobstep.job_id) assert jobplan and buildstep limits = buildstep.get_resource_limits() req_cpus = limits.get('cpus', 4) req_mem = limits.get('memory', 8 * 1024) allocation_cmd = buildstep.get_allocation_command(jobstep) jobstep_data = self.serialize(jobstep) jobstep_data['project'] = self.serialize(jobstep.project) jobstep_data['resources'] = { 'cpus': req_cpus, 'mem': req_mem, } jobstep_data['cmd'] = allocation_cmd jobstep_results.append(jobstep_data) return self.respond({'jobsteps': jobstep_results})
def get(self): """ GET method that returns a priority sorted list of possible jobsteps to allocate. The scheduler can then decide which ones it can actually allocate and makes a POST request to mark these as such with Changes. Args (in the form of a query string): cluster (Optional[str]): The cluster to look for jobsteps in. limit (int (default 200)): Maximum number of jobsteps to return. """ args = self.get_parser.parse_args() cluster = args.cluster limit = args.limit with statsreporter.stats().timer('jobstep_allocate_get'): available_allocations = self.find_next_jobsteps(limit, cluster) jobstep_results = self.serialize(available_allocations) buildstep_for_job_id = {} for jobstep, jobstep_data in zip(available_allocations, jobstep_results): if jobstep.job_id not in buildstep_for_job_id: buildstep_for_job_id[jobstep.job_id] = JobPlan.get_build_step_for_job(jobstep.job_id)[1] buildstep = buildstep_for_job_id[jobstep.job_id] limits = buildstep.get_resource_limits() req_cpus = limits.get('cpus', 4) req_mem = limits.get('memory', 8 * 1024) allocation_cmd = buildstep.get_allocation_command(jobstep) jobstep_data['project'] = jobstep.project jobstep_data['resources'] = { 'cpus': req_cpus, 'mem': req_mem, } jobstep_data['cmd'] = allocation_cmd return self.respond({'jobsteps': jobstep_results})
def process(self, fp, artifact): try: phase_config = json.load(fp) except ValueError: uri = build_web_uri('/find_build/{0}/'.format( self.step.job.build_id.hex)) self.logger.warning('Failed to parse json; (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self.report_malformed() else: _, implementation = JobPlan.get_build_step_for_job( job_id=self.step.job_id) try: implementation.expand_jobs(self.step, phase_config) except ArtifactParseError: uri = build_web_uri('/find_build/{0}/'.format( self.step.job.build_id.hex)) self.logger.warning( 'malformed %s artifact (step=%s, build=%s)', self.FILENAMES[0], self.step.id.hex, uri, exc_info=True) self.report_malformed() except Exception: uri = build_web_uri('/find_build/{0}/'.format( self.step.job.build_id.hex)) self.logger.warning('expand_jobs failed (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self.step.result = Result.infra_failed db.session.add(self.step) db.session.commit()
def _sync_artifacts_for_jobstep(step): # only generate the sync_artifact tasks for this step once if Task.query.filter( Task.parent_id == step.id, Task.task_name == 'sync_artifact', ).first(): return artifacts = Artifact.query.filter(Artifact.step_id == step.id).all() _, buildstep = JobPlan.get_build_step_for_job(job_id=step.job_id) prefer_artifactstore = buildstep.prefer_artifactstore() artifact_manager = buildstep.get_artifact_manager(step) to_sync = _get_artifacts_to_sync(artifacts, artifact_manager, prefer_artifactstore) # buildstep may want to check for e.g. required artifacts buildstep.verify_final_artifacts(step, to_sync) for artifact in to_sync: sync_artifact.delay_if_needed( artifact_id=artifact.id.hex, task_id=artifact.id.hex, parent_task_id=step.id.hex, )
def process_row(agg, jobstep): status = jobstep.status.name current = agg.get(status) or default.copy() current['count'] += 1 if args.check_resources: if jobstep.job_id not in buildstep_for_job_id: buildstep_for_job_id[ jobstep.job_id] = JobPlan.get_build_step_for_job( jobstep.job_id)[1] buildstep = buildstep_for_job_id[jobstep.job_id] limits = buildstep.get_resource_limits() req_cpus = limits.get('cpus', DEFAULT_CPUS) req_mem = limits.get('memory', DEFAULT_MEMORY_MB) current['cpus'] += req_cpus current['mem'] += req_mem # Track the oldest jobstep we've seen. if current['created'] is None or jobstep.date_created < current[ 'created']: current['created'] = jobstep.date_created current['jobstep_id'] = jobstep.id agg[status] = current
def create_job_plan(self, job, plan, snapshot_id=None): jobplan = JobPlan.build_jobplan(plan, job, snapshot_id=snapshot_id) db.session.add(jobplan) db.session.commit() return jobplan
def execute_build(build, snapshot_id, no_snapshot): if no_snapshot: assert snapshot_id is None, 'Cannot specify snapshot with no_snapshot option' # TODO(dcramer): most of this should be abstracted into sync_build as if it # were a "im on step 0, create step 1" project = build.project # We choose a snapshot before creating jobplans. This is so that different # jobplans won't end up using different snapshots in a build. if snapshot_id is None and not no_snapshot: snapshot = Snapshot.get_current(project.id) if snapshot: snapshot_id = snapshot.id plans = get_build_plans(project) options = ItemOptionsHelper.get_options([p.id for p in plans], ['snapshot.require']) jobs = [] for plan in get_build_plans(project): if (options[plan.id].get('snapshot.require', '0') == '1' and not no_snapshot and SnapshotImage.get(plan, snapshot_id) is None): logging.warning( 'Skipping plan %r (%r) because no snapshot exists yet', plan.label, project.slug) continue job = Job( build=build, build_id=build.id, project=project, project_id=project.id, source=build.source, source_id=build.source_id, status=build.status, label=plan.label, ) db.session.add(job) jobplan = JobPlan.build_jobplan(plan, job, snapshot_id=snapshot_id) db.session.add(jobplan) jobs.append(job) db.session.commit() for job in jobs: create_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, ) db.session.commit() sync_build.delay( build_id=build.id.hex, task_id=build.id.hex, ) return build
def test_autogenerated_commands(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': [], 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.dependencies': { 'encap': [ 'package1', 'pkg-2', ] }, 'bazel.exclude-tags': [], 'bazel.cpus': 4, # Default 'bazel.mem': 8192, # Default 'bazel.max-executors': 1, # Default } mock_vcs = mock.Mock(spec=Vcs) mock_vcs.get_buildstep_checkout_revision.return_value = 'git checkout master' mock_vcs.get_buildstep_checkout_parent_revision.return_value = 'git checkout master^' mock_vcs.get_buildstep_changed_files.return_value = 'git diff --name-only master^..master' job = self._create_job_and_jobplan() with mock.patch.object(job.project.repository, "get_vcs") as mock_get_vcs: mock_get_vcs.return_value = mock_vcs _, implementation = JobPlan.get_build_step_for_job(job.id) bazel_setup_expected = """#!/bin/bash -eux sudo apt-get install -y --force-yes bazel """.strip() sync_encap_expected = """ sudo mkdir -p /usr/local/encap/ sudo /usr/bin/rsync -a --delete rsync://example.com/encap/package1 /usr/local/encap/ sudo /usr/bin/rsync -a --delete rsync://example.com/encap/pkg-2 /usr/local/encap/ """.strip() collect_targets_expected = """#!/bin/bash -eu sudo apt-get install -y --force-yes bazel python >/dev/null 2>&1 "/var/changes/input/collect-targets" --output-user-root="/bazel/root/path" --target-patterns=//aa/bb/cc/... --target-patterns=//aa/abc/... --test-flags=--spawn_strategy=sandboxed --test-flags=--genrule_strategy=sandboxed --test-flags=--keep_going --jobs="8" --selective-testing-skip-list={} 2> /dev/null """.strip().format(job.project.get_config_path()) extra_setup_expected = """#!/bin/bash -eux exit 0 """.strip() assert len(implementation.commands) == 4 assert implementation.max_executors == 1 assert implementation.artifacts == [] assert implementation.artifact_suffix == '.bazel' assert implementation.commands[0].type == CommandType.setup assert implementation.commands[0].script == bazel_setup_expected assert implementation.commands[1].type == CommandType.setup assert implementation.commands[1].script == sync_encap_expected assert implementation.commands[2].type == CommandType.setup assert implementation.commands[2].script == extra_setup_expected assert implementation.commands[ 3].type == CommandType.collect_bazel_targets assert implementation.commands[3].script == collect_targets_expected assert implementation.commands[3].env[ 'VCS_CHECKOUT_TARGET_REVISION_CMD'] == 'git checkout master' assert implementation.commands[3].env[ 'VCS_CHECKOUT_PARENT_REVISION_CMD'] == 'git checkout master^' assert implementation.commands[3].env[ 'VCS_GET_CHANGED_FILES_CMD'] == 'git diff --name-only master^..master'
def post(self, project_id): """Initiates a new snapshot for this project.""" project = Project.get(project_id) if not project: return '', 404 args = self.post_parser.parse_args() repository = project.repository try: revision = identify_revision(repository, args.sha) except MissingRevision: # if the default fails, we absolutely can't continue and the # client should send a valid revision return error("Unable to find a matching revision.") if revision: sha = revision.sha else: sha = args.sha plan_list = get_snapshottable_plans(project) if not plan_list: return error("No snapshottable plans associated with project.") source, _ = get_or_create(Source, where={ 'repository': repository, 'revision_sha': sha, 'patch_id': None, }) build = Build( source_id=source.id, source=source, project_id=project.id, project=project, label='Create Snapshot', status=Status.queued, cause=Cause.snapshot, target=sha[:12], tags=['snapshot'], # Snapshot builds are often part of the solution to queueing, so we make them # high priority to schedule them sooner. priority=BuildPriority.high, ) db.session.add(build) # TODO(dcramer): this needs to update with the build result snapshot = Snapshot( project_id=project.id, source_id=source.id, build_id=build.id, status=SnapshotStatus.pending, ) db.session.add(snapshot) jobs = [] for plan in plan_list: job = Job( build=build, build_id=build.id, project=project, project_id=project.id, source=build.source, source_id=build.source_id, status=build.status, label='Create Snapshot: %s' % (plan.label, ), ) db.session.add(job) jobplan = JobPlan.build_jobplan(plan, job) db.session.add(jobplan) image = SnapshotImage( job=job, snapshot=snapshot, plan=plan, ) db.session.add(image) jobs.append(job) db.session.commit() for job in jobs: create_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, ) db.session.commit() sync_build.delay( build_id=build.id.hex, task_id=build.id.hex, ) return self.respond(snapshot)
def sync_job_step(step_id): """ Polls a build for updates. May have sync_artifact children. """ step = JobStep.query.get(step_id) if not step: return jobplan, implementation = JobPlan.get_build_step_for_job(job_id=step.job_id) # only synchronize if upstream hasn't suggested we're finished if step.status != Status.finished: implementation.update_step(step=step) db.session.flush() _sync_from_artifact_store(step) if step.status == Status.finished: _sync_artifacts_for_jobstep(step) is_finished = (step.status == Status.finished and # make sure all child tasks (like sync_artifact) have also finished sync_job_step.verify_all_children() == Status.finished) if not is_finished: default_timeout = current_app.config['DEFAULT_JOB_TIMEOUT_MIN'] if has_timed_out(step, jobplan, default_timeout=default_timeout): old_status = step.status step.data['timed_out'] = True implementation.cancel_step(step=step) # Not all implementations can actually cancel, but it's dead to us as of now # so we mark it as finished. step.status = Status.finished step.date_finished = datetime.utcnow() # Implementations default to marking canceled steps as aborted, # but we're not canceling on good terms (it should be done by now) # so we consider it a failure here. # # We check whether the step was marked as in_progress to make a best # guess as to whether this is an infrastructure failure, or the # repository under test is just taking too long. This won't be 100% # reliable, but is probably good enough. if old_status == Status.in_progress: step.result = Result.failed else: step.result = Result.infra_failed db.session.add(step) job = step.job try_create(FailureReason, { 'step_id': step.id, 'job_id': job.id, 'build_id': job.build_id, 'project_id': job.project_id, 'reason': 'timeout' }) db.session.flush() statsreporter.stats().incr('job_step_timed_out') # If we timeout something that isn't in progress, that's our fault, and we should know. if old_status != Status.in_progress: current_app.logger.warning( "Timed out jobstep that wasn't in progress: %s (was %s)", step.id, old_status) raise sync_job_step.NotFinished # Close the ArtifactStore bucket used by jenkins, if it exists bucket_name = step.data.get('jenkins_bucket_name') if bucket_name: try: ArtifactStoreClient(current_app.config['ARTIFACTS_SERVER']).close_bucket(bucket_name) except Exception: # Closing buckets is not strictly necessary in artifactstore pass # Ignore any 'failures' if the build did not finish properly. # NOTE(josiah): we might want to include "unknown" and "skipped" here as # well, or have some named condition like "not meaningful_result(step.result)". if step.result in (Result.aborted, Result.infra_failed): _report_jobstep_result(step) return # Check for FailureReason objects generated by child jobs failure_result = _result_from_failure_reasons(step) if failure_result and failure_result != step.result: step.result = failure_result db.session.add(step) db.session.commit() if failure_result == Result.infra_failed: _report_jobstep_result(step) return try: record_coverage_stats(step) except Exception: current_app.logger.exception('Failing recording coverage stats for step %s', step.id) missing_tests = is_missing_tests(step, jobplan) try_create(ItemStat, where={ 'item_id': step.id, 'name': 'tests_missing', 'value': int(missing_tests), }) if missing_tests: if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create(FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'missing_tests' }) db.session.commit() db.session.flush() if has_test_failures(step): if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create(FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'test_failures' }) db.session.commit() _report_jobstep_result(step)
def job(build, change=None, **kwargs): kwargs.setdefault('project', build.project) kwargs.setdefault('label', get_sentences(1)[0][:128]) kwargs.setdefault('status', Status.finished) kwargs.setdefault('result', Result.passed) kwargs.setdefault('duration', random.randint(10000, 100000)) kwargs['source'] = build.source kwargs['source_id'] = kwargs['source'].id kwargs['project_id'] = kwargs['project'].id kwargs['build_id'] = build.id if change: kwargs['change_id'] = change.id job = Job( build=build, change=change, **kwargs ) db.session.add(job) node, created = get_or_create(Node, where={ 'label': get_sentences(1)[0][:32], }) if created: cluster, _ = get_or_create(Cluster, where={ 'label': get_sentences(1)[0][:32], }) clusternode = ClusterNode(cluster=cluster, node=node) db.session.add(clusternode) jobplan = JobPlan.build_jobplan(plan(build.project), job) db.session.add(jobplan) phase1_setup = JobPhase( project=job.project, job=job, date_started=job.date_started, date_finished=job.date_finished, status=Status.finished, result=Result.passed, label='Setup', ) db.session.add(phase1_setup) phase1_compile = JobPhase( project=job.project, job=job, date_started=job.date_started, date_finished=job.date_finished, status=Status.finished, result=Result.passed, label='Compile', ) db.session.add(phase1_compile) phase1_test = JobPhase( project=job.project, job=job, date_started=job.date_started, date_finished=job.date_finished, status=kwargs['status'], result=kwargs['result'], label='Test', ) db.session.add(phase1_test) step = JobStep( project=job.project, job=job, phase=phase1_setup, status=phase1_setup.status, result=phase1_setup.result, label='Setup', node=node, ) db.session.add(step) command = Command( jobstep=step, script="echo 1", label="echo 1", ) db.session.add(command) step = JobStep( project=job.project, job=job, phase=phase1_compile, status=phase1_compile.status, result=phase1_compile.result, label='Compile', node=node, ) db.session.add(step) command = Command( jobstep=step, script="echo 2", label="echo 2", ) db.session.add(command) step = JobStep( project=job.project, job=job, phase=phase1_test, status=phase1_test.status, result=phase1_test.result, label=TEST_STEP_LABELS.next(), node=node, ) db.session.add(step) command = Command( jobstep=step, script="echo 3", label="echo 3", ) db.session.add(command) step = JobStep( project=job.project, job=job, phase=phase1_test, status=phase1_test.status, result=phase1_test.result, label=TEST_STEP_LABELS.next(), node=node, ) db.session.add(step) command = Command( jobstep=step, script="echo 4", label="echo 4", ) db.session.add(command) if phase1_test.result == Result.failed: db.session.add(FailureReason( reason='test_failures', build_id=build.id, job_id=job.id, step_id=step.id, project_id=job.project_id )) return job
def post(self): args = json.loads(request.data) # TODO(nate): get rid of old allocation code once scheduler is updated to use this if args.get('jobstep_ids'): return self.new_post(args) try: resources = args['resources'] except KeyError: return error('Missing resources attribute') cluster = args.get('cluster') # cpu and mem as 0 are treated by changes-client # as having no enforced limit total_cpus = int(resources.get('cpus', 0)) total_mem = int(resources.get('mem', 0)) # MB with statsreporter.stats().timer('jobstep_allocate'): try: lock_key = 'jobstep:allocate' if cluster: lock_key = lock_key + ':' + cluster with redis.lock(lock_key, nowait=True): available_allocations = self.find_next_jobsteps(limit=10, cluster=cluster) to_allocate = [] for jobstep in available_allocations: jobplan, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) assert jobplan and buildstep limits = buildstep.get_resource_limits() req_cpus = limits.get('cpus', 4) req_mem = limits.get('memory', 8 * 1024) if total_cpus >= req_cpus and total_mem >= req_mem: total_cpus -= req_cpus total_mem -= req_mem allocation_cmd = buildstep.get_allocation_command(jobstep) jobstep.status = Status.allocated db.session.add(jobstep) # We keep the data from the BuildStep to be sure we're using the same resource values. to_allocate.append((jobstep, _AllocData(cpus=req_cpus, memory=req_mem, command=allocation_cmd))) # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is # pending_allocation, so we can determine how long it was pending by how long ago it was # created. pending_seconds = (datetime.utcnow() - jobstep.date_created).total_seconds() statsreporter.stats().log_timing('duration_pending_allocation', pending_seconds * 1000) else: logging.info('Not allocating %s due to lack of offered resources', jobstep.id.hex) if not to_allocate: # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests return self.respond([]) db.session.flush() except UnableToGetLock: return error('Another allocation is in progress', http_code=503) context = [] for jobstep, alloc_data in to_allocate: try: jobstep_data = self.serialize(jobstep) jobstep_data['project'] = self.serialize(jobstep.project) jobstep_data['resources'] = { 'cpus': alloc_data.cpus, 'mem': alloc_data.memory, } jobstep_data['cmd'] = alloc_data.command except Exception: jobstep.status = Status.finished jobstep.result = Result.infra_failed db.session.add(jobstep) db.session.flush() logging.exception( 'Exception occurred while allocating job step %s for project %s', jobstep.id.hex, jobstep.project.slug) else: context.append(jobstep_data) return self.respond(context)
def test_autogenerated_commands(self, get_config): get_config.return_value = { 'bazel.additional-test-flags': [], 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.dependencies': { 'encap': [ 'package1', 'pkg-2', ] }, 'bazel.exclude-tags': [], 'bazel.cpus': 4, # Default 'bazel.mem': 8192, # Default 'bazel.max-executors': 1, # Default } mock_vcs = mock.Mock(spec=Vcs) mock_vcs.get_buildstep_checkout_revision.return_value = 'git checkout master' mock_vcs.get_buildstep_checkout_parent_revision.return_value = 'git checkout master^' mock_vcs.get_buildstep_changed_files.return_value = 'git diff --name-only master^..master' job = self._create_job_and_jobplan() with mock.patch.object(job.project.repository, "get_vcs") as mock_get_vcs: mock_get_vcs.return_value = mock_vcs _, implementation = JobPlan.get_build_step_for_job(job.id) bazel_setup_expected = """#!/bin/bash -eux sudo apt-get install -y --force-yes bazel """.strip() sync_encap_expected = """ sudo mkdir -p /usr/local/encap/ sudo /usr/bin/rsync -a --delete rsync://example.com/encap/package1 /usr/local/encap/ sudo /usr/bin/rsync -a --delete rsync://example.com/encap/pkg-2 /usr/local/encap/ """.strip() collect_targets_expected = """#!/bin/bash -eu sudo apt-get install -y --force-yes bazel python >/dev/null 2>&1 "/var/changes/input/collect-targets" --output-user-root="/bazel/root/path" --target-patterns=//aa/bb/cc/... --target-patterns=//aa/abc/... --test-flags=--spawn_strategy=sandboxed --test-flags=--genrule_strategy=sandboxed --test-flags=--keep_going --jobs="8" --selective-testing-skip-list={} 2> /dev/null """.strip().format(job.project.get_config_path()) extra_setup_expected = """#!/bin/bash -eux exit 0 """.strip() assert len(implementation.commands) == 4 assert implementation.max_executors == 1 assert implementation.artifacts == [] assert implementation.artifact_suffix == '.bazel' assert implementation.commands[0].type == CommandType.setup assert implementation.commands[0].script == bazel_setup_expected assert implementation.commands[1].type == CommandType.setup assert implementation.commands[1].script == sync_encap_expected assert implementation.commands[2].type == CommandType.setup assert implementation.commands[2].script == extra_setup_expected assert implementation.commands[3].type == CommandType.collect_bazel_targets assert implementation.commands[3].script == collect_targets_expected assert implementation.commands[3].env['VCS_CHECKOUT_TARGET_REVISION_CMD'] == 'git checkout master' assert implementation.commands[3].env['VCS_CHECKOUT_PARENT_REVISION_CMD'] == 'git checkout master^' assert implementation.commands[3].env['VCS_GET_CHANGED_FILES_CMD'] == 'git diff --name-only master^..master'
def post(self, project_id): """Initiates a new snapshot for this project.""" project = Project.get(project_id) if not project: return '', 404 args = self.post_parser.parse_args() repository = project.repository try: revision = identify_revision(repository, args.sha) except MissingRevision: # if the default fails, we absolutely can't continue and the # client should send a valid revision return error("Unable to find a matching revision.") if revision: sha = revision.sha else: sha = args.sha plan_list = get_snapshottable_plans(project) if not plan_list: return error("No snapshottable plans associated with project.") source, _ = get_or_create(Source, where={ 'repository': repository, 'revision_sha': sha, 'patch_id': None, }) build = Build( source_id=source.id, source=source, project_id=project.id, project=project, label='Create Snapshot', status=Status.queued, cause=Cause.snapshot, target=sha[:12], tags=['snapshot'], # Snapshot builds are often part of the solution to queueing, so we make them # high priority to schedule them sooner. priority=BuildPriority.high, ) db.session.add(build) # TODO(dcramer): this needs to update with the build result snapshot = Snapshot( project_id=project.id, source_id=source.id, build_id=build.id, status=SnapshotStatus.pending, ) db.session.add(snapshot) jobs = [] for plan in plan_list: job = Job( build=build, build_id=build.id, project=project, project_id=project.id, source=build.source, source_id=build.source_id, status=build.status, label='Create Snapshot: %s' % (plan.label,), ) db.session.add(job) jobplan = JobPlan.build_jobplan(plan, job) db.session.add(jobplan) image = SnapshotImage( job=job, snapshot=snapshot, plan=plan, ) db.session.add(image) jobs.append(job) db.session.commit() for job in jobs: create_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, ) db.session.commit() sync_build.delay( build_id=build.id.hex, task_id=build.id.hex, ) return self.respond(snapshot)
def sync_job_step(step_id): """ Polls a build for updates. May have sync_artifact children. """ step = JobStep.query.get(step_id) if not step: return jobplan, implementation = JobPlan.get_build_step_for_job( job_id=step.job_id) # only synchronize if upstream hasn't suggested we're finished if step.status != Status.finished: implementation.update_step(step=step) db.session.flush() _sync_from_artifact_store(step) if step.status == Status.finished: # there is a small race condition where step.status got changed right after # the first call to _sync_from_artifact_store _sync_from_artifact_store(step) _sync_artifacts_for_jobstep(step) is_finished = ( step.status == Status.finished and # make sure all child tasks (like sync_artifact) have also finished sync_job_step.verify_all_children() == Status.finished) if not is_finished: default_timeout = current_app.config['DEFAULT_JOB_TIMEOUT_MIN'] if has_timed_out(step, jobplan, default_timeout=default_timeout): old_status = step.status step.data['timed_out'] = True implementation.cancel_step(step=step) # Not all implementations can actually cancel, but it's dead to us as of now # so we mark it as finished. step.status = Status.finished step.date_finished = datetime.utcnow() # Implementations default to marking canceled steps as aborted, # but we're not canceling on good terms (it should be done by now) # so we consider it a failure here. # # We check whether the step was marked as in_progress to make a best # guess as to whether this is an infrastructure failure, or the # repository under test is just taking too long. This won't be 100% # reliable, but is probably good enough. if old_status == Status.in_progress: step.result = Result.failed else: step.result = Result.infra_failed db.session.add(step) job = step.job try_create( FailureReason, { 'step_id': step.id, 'job_id': job.id, 'build_id': job.build_id, 'project_id': job.project_id, 'reason': 'timeout' }) db.session.flush() statsreporter.stats().incr('job_step_timed_out') # If we timeout something that isn't in progress, that's our fault, and we should know. if old_status != Status.in_progress: current_app.logger.warning( "Timed out jobstep that wasn't in progress: %s (was %s)", step.id, old_status) raise sync_job_step.NotFinished # Close the ArtifactStore bucket used by jenkins, if it exists bucket_name = step.data.get('jenkins_bucket_name') if bucket_name: try: ArtifactStoreClient( current_app.config['ARTIFACTS_SERVER']).close_bucket( bucket_name) except Exception: # Closing buckets is not strictly necessary in artifactstore pass # Ignore any 'failures' if the build did not finish properly. # NOTE(josiah): we might want to include "unknown" and "skipped" here as # well, or have some named condition like "not meaningful_result(step.result)". if step.result in (Result.aborted, Result.infra_failed): _report_jobstep_result(step) return # Check for FailureReason objects generated by child jobs failure_result = _result_from_failure_reasons(step) if failure_result and failure_result != step.result: step.result = failure_result db.session.add(step) db.session.commit() if failure_result == Result.infra_failed: _report_jobstep_result(step) return try: record_coverage_stats(step) except Exception: current_app.logger.exception( 'Failing recording coverage stats for step %s', step.id) missing_tests = is_missing_tests(step, jobplan) try_create(ItemStat, where={ 'item_id': step.id, 'name': 'tests_missing', 'value': int(missing_tests), }) if missing_tests: if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create( FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'missing_tests' }) db.session.commit() db.session.flush() if has_test_failures(step): if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create( FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'test_failures' }) db.session.commit() if has_missing_targets(step): if step.result != Result.failed: step.result = Result.failed db.session.add(step) BazelTarget.query.filter( BazelTarget.step_id == step.id, BazelTarget.status == Status.in_progress, ).update({ 'status': Status.finished, 'result': Result.aborted, }) try_create( FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'missing_targets', }) db.session.commit() _report_jobstep_result(step)
def execute_build(build, snapshot_id, no_snapshot): if no_snapshot: assert snapshot_id is None, 'Cannot specify snapshot with no_snapshot option' # TODO(dcramer): most of this should be abstracted into sync_build as if it # were a "im on step 0, create step 1" project = build.project # We choose a snapshot before creating jobplans. This is so that different # jobplans won't end up using different snapshots in a build. if snapshot_id is None and not no_snapshot: snapshot = Snapshot.get_current(project.id) if snapshot: snapshot_id = snapshot.id plans = get_build_plans(project) options = ItemOptionsHelper.get_options([p.id for p in plans], ['snapshot.require']) jobs = [] for plan in get_build_plans(project): if (options[plan.id].get('snapshot.require', '0') == '1' and not no_snapshot and SnapshotImage.get(plan, snapshot_id) is None): logging.warning('Skipping plan %r (%r) because no snapshot exists yet', plan.label, project.slug) continue job = Job( build=build, build_id=build.id, project=project, project_id=project.id, source=build.source, source_id=build.source_id, status=build.status, label=plan.label, ) db.session.add(job) jobplan = JobPlan.build_jobplan(plan, job, snapshot_id=snapshot_id) db.session.add(jobplan) jobs.append(job) db.session.commit() for job in jobs: create_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, ) db.session.commit() sync_build.delay( build_id=build.id.hex, task_id=build.id.hex, ) return build
def test_autogenerated_commands(self, get_config): get_config.return_value = { 'bazel.targets': [ '//aa/bb/cc/...', '//aa/abc/...', ], 'bazel.dependencies': { 'encap': [ 'package1', 'pkg-2', ] } } current_app.config['APT_SPEC'] = 'deb http://example.com/debian distribution component1' current_app.config['ENCAP_RSYNC_URL'] = 'rsync://example.com/encap/' project = self.create_project() plan = self.create_plan(project) option = self.create_option( item_id=plan.id, name='bazel.autogenerate', value='1', ) build = self.create_build(project) job = self.create_job(build) jobplan = self.create_job_plan(job, plan) _, implementation = JobPlan.get_build_step_for_job(job.id) bazel_setup_expected = """#!/bin/bash -eux echo "deb http://example.com/debian distribution component1" | sudo tee /etc/apt/sources.list.d/bazel-changes-autogen.list sudo apt-get update || true sudo apt-get install -y --force-yes bazel drte-v1 gcc unzip zip """.strip() sync_encap_expected = """ sudo mkdir -p /usr/local/encap/ sudo rsync -a --delete rsync://example.com/encap/package1 /usr/local/encap/ sudo rsync -a --delete rsync://example.com/encap/pkg-2 /usr/local/encap/ """.strip() collect_tests_expected = """#!/bin/bash -eu echo "deb http://example.com/debian distribution component1" | sudo tee /etc/apt/sources.list.d/bazel-changes-autogen.list > /dev/null 2>&1 (sudo apt-get update || true) > /dev/null 2>&1 sudo apt-get install -y --force-yes bazel drte-v1 gcc unzip zip python > /dev/null 2>&1 (bazel query 'tests(//aa/bb/cc/... + //aa/abc/...)' | python -c "import sys import json targets = sys.stdin.read().splitlines() out = { 'cmd': 'bazel test {test_names}', 'tests': targets, } json.dump(out, sys.stdout) ") 2> /dev/null """.strip() assert len(implementation.commands) == 3 assert implementation.commands[0].type == CommandType.setup assert implementation.commands[0].script == bazel_setup_expected assert implementation.commands[1].type == CommandType.setup assert implementation.commands[1].script == sync_encap_expected assert implementation.commands[2].type == CommandType.collect_tests assert implementation.commands[2].script == collect_tests_expected
def sync_job(job_id): """ Updates jobphase and job statuses based on the status of the constituent jobsteps. """ job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases, implementation) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg(min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg(max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int( (job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # If any test cases were marked as failing, fail the build. # The exception is if the only failing test case occurred in a JobStep that # had an infra failure. In this case we can't trust the test case result as # being meaningful and so we ignore these. elif TestCase.query.join(JobStep, JobStep.id == TestCase.step_id).filter( TestCase.result == Result.failed, TestCase.job_id == job.id, JobStep.result != Result.infra_failed).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: # Sets the final job result. implementation.validate(job=job) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception( 'Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception( 'Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def post(self): args = json.loads(request.data) # TODO(nate): get rid of old allocation code once scheduler is updated to use this if args.get('jobstep_ids'): return self.new_post(args) try: resources = args['resources'] except KeyError: return error('Missing resources attribute') cluster = args.get('cluster') # cpu and mem as 0 are treated by changes-client # as having no enforced limit total_cpus = int(resources.get('cpus', 0)) total_mem = int(resources.get('mem', 0)) # MB with statsreporter.stats().timer('jobstep_allocate'): try: lock_key = 'jobstep:allocate' if cluster: lock_key = lock_key + ':' + cluster with redis.lock(lock_key, nowait=True): available_allocations = self.find_next_jobsteps( limit=10, cluster=cluster) to_allocate = [] for jobstep in available_allocations: jobplan, buildstep = JobPlan.get_build_step_for_job( jobstep.job_id) assert jobplan and buildstep limits = buildstep.get_resource_limits() req_cpus = limits.get('cpus', 4) req_mem = limits.get('memory', 8 * 1024) if total_cpus >= req_cpus and total_mem >= req_mem: total_cpus -= req_cpus total_mem -= req_mem allocation_cmd = buildstep.get_allocation_command( jobstep) jobstep.status = Status.allocated db.session.add(jobstep) # We keep the data from the BuildStep to be sure we're using the same resource values. to_allocate.append( (jobstep, _AllocData(cpus=req_cpus, memory=req_mem, command=allocation_cmd))) # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is # pending_allocation, so we can determine how long it was pending by how long ago it was # created. pending_seconds = ( datetime.utcnow() - jobstep.date_created).total_seconds() statsreporter.stats().log_timing( 'duration_pending_allocation', pending_seconds * 1000) else: logging.info( 'Not allocating %s due to lack of offered resources', jobstep.id.hex) if not to_allocate: # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests return self.respond([]) db.session.flush() except UnableToGetLock: return error('Another allocation is in progress', http_code=503) context = [] for jobstep, alloc_data in to_allocate: try: jobstep_data = self.serialize(jobstep) jobstep_data['project'] = self.serialize(jobstep.project) jobstep_data['resources'] = { 'cpus': alloc_data.cpus, 'mem': alloc_data.memory, } jobstep_data['cmd'] = alloc_data.command except Exception: jobstep.status = Status.finished jobstep.result = Result.infra_failed db.session.add(jobstep) db.session.flush() logging.exception( 'Exception occurred while allocating job step %s for project %s', jobstep.id.hex, jobstep.project.slug) else: context.append(jobstep_data) return self.respond(context)