def sync_repo(repo_id, continuous=True): repo = Repository.query.get(repo_id) if not repo: logger.error('Repository %s not found', repo_id) return vcs = repo.get_vcs() if vcs is None: logger.warning('Repository %s has no VCS backend set', repo.id) return if repo.status != RepositoryStatus.active: logger.info('Repository %s is not active', repo.id) return Repository.query.filter(Repository.id == repo.id, ).update( { 'last_update_attempt': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if vcs.exists(): vcs.update() else: vcs.clone() # TODO(dcramer): this doesnt scrape everything, and really we wouldn't # want to do this all in a single job so we should split this into a # backfill task # TODO(dcramer): this doesn't collect commits in non-default branches might_have_more = True parent = None while might_have_more: might_have_more = False for commit in vcs.log(parent=parent): revision, created = commit.save(repo) db.session.commit() if not created: break might_have_more = True parent = commit.id fire_signal.delay( signal='revision.created', kwargs={ 'repository_id': repo.id.hex, 'revision_sha': revision.sha }, ) Repository.query.filter(Repository.id == repo.id, ).update( { 'last_update': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if continuous: raise sync_repo.NotFinished
def sync_repo(repo_id, continuous=True): with RCount('sync_repo'): repo = Repository.query.get(repo_id) if not repo: logger.error('Repository %s not found', repo_id) return vcs = repo.get_vcs() if vcs is None: logger.warning('Repository %s has no VCS backend set', repo.id) return if repo.status != RepositoryStatus.active: logger.info('Repository %s is not active', repo.id) return Repository.query.filter( Repository.id == repo.id, ).update({ 'last_update_attempt': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if vcs.exists(): vcs.update() else: vcs.clone() # TODO(dcramer): this doesnt scrape everything, and really we wouldn't # want to do this all in a single job so we should split this into a # backfill task # TODO(dcramer): this doesn't collect commits in non-default branches might_have_more = True parent = None while might_have_more: might_have_more = False for commit in vcs.log(parent=parent): revision, created = commit.save(repo) db.session.commit() if not created: break might_have_more = True parent = commit.id fire_signal.delay( signal='revision.created', kwargs={'repository_id': repo.id.hex, 'revision_sha': revision.sha}, ) Repository.query.filter( Repository.id == repo.id, ).update({ 'last_update': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if continuous: raise sync_repo.NotFinished
def sync_job(job_id): """ Updates jobphase and job statuses based on the status of the constituent jobsteps. """ job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases, implementation) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg( min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg( max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int((job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # If any test cases were marked as failing, fail the build. # The exception is if the only failing test case occurred in a JobStep that # had an infra failure. In this case we can't trust the test case result as # being meaningful and so we ignore these. elif TestCase.query.join(JobStep, JobStep.id == TestCase.step_id).filter( TestCase.result == Result.failed, TestCase.job_id == job.id, JobStep.result != Result.infra_failed ).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: # Sets the final job result. implementation.validate(job=job) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception('Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def sync_build(build_id): """ Synchronizing the build happens continuously until all jobs have reported in as finished or have failed/aborted. This task is responsible for: - Checking in with jobs - Aborting/retrying them if they're beyond limits - Aggregating the results from jobs into the build itself """ build = Build.query.get(build_id) if not build: return if build.status == Status.finished: return all_jobs = list(Job.query.filter( Job.build_id == build_id, )) is_finished = sync_build.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_jobs): is_finished = False prev_started = build.date_started build.date_started = safe_agg( min, (j.date_started for j in all_jobs if j.date_started)) # We want to report how long we waited for the build to start once and only once, # so we do it at the transition from not started to started. if not prev_started and build.date_started: queued_time = build.date_started - build.date_created statsreporter.stats().log_timing('build_start_latency', _timedelta_to_millis(queued_time)) if is_finished: # If there are no jobs (or no jobs with a finished date) fall back to # finishing now, since at this point, the build is done executing. build.date_finished = safe_agg( max, (j.date_finished for j in all_jobs if j.date_finished), datetime.utcnow()) else: build.date_finished = None if build.date_started and build.date_finished: build.duration = _timedelta_to_millis(build.date_finished - build.date_started) else: build.duration = None if any(j.result is Result.failed for j in all_jobs): build.result = Result.failed elif is_finished: build.result = aggregate_result((j.result for j in all_jobs)) else: build.result = Result.unknown if is_finished: build.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_jobs)) if new_status != Status.finished: build.status = new_status if is_finished: build.date_decided = datetime.utcnow() decided_latency = build.date_decided - build.date_finished statsreporter.stats().log_timing('build_decided_latency', _timedelta_to_millis(decided_latency)) else: build.date_decided = None if db.session.is_modified(build): build.date_modified = datetime.utcnow() db.session.add(build) db.session.commit() if not is_finished: raise sync_build.NotFinished with statsreporter.stats().timer('build_stat_aggregation'): try: aggregate_build_stat(build, 'test_count') aggregate_build_stat(build, 'test_duration') aggregate_build_stat(build, 'test_failures') aggregate_build_stat(build, 'test_rerun_count') aggregate_build_stat(build, 'tests_missing') aggregate_build_stat(build, 'lines_covered') aggregate_build_stat(build, 'lines_uncovered') aggregate_build_stat(build, 'diff_lines_covered') aggregate_build_stat(build, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for build %s', build.id) fire_signal.delay( signal='build.finished', kwargs={'build_id': build.id.hex}, ) queue.delay('update_project_stats', kwargs={ 'project_id': build.project_id.hex, }, countdown=1)
def sync(repo): """ Checks the repository for new commits, and fires revision.created signals. """ vcs = repo.get_vcs() if vcs is None: logger.warning('Repository %s has no VCS backend set', repo.id) return False if repo.status != RepositoryStatus.active: logger.info('Repository %s is not active', repo.id) return False Repository.query.filter( Repository.id == repo.id, ).update({ 'last_update_attempt': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if vcs.exists(): try: vcs.update() except ConcurrentUpdateError: # Updating already so no need to update. pass else: vcs.clone() # The loop below do two things: # 1) adds new revisions to the database # 2) fire off revision created signals for recent revisions # # TODO(dcramer): this doesnt scrape everything, and really we wouldn't # want to do this all in a single job so we should split this into a # backfill task if repo.backend == RepositoryBackend.git: revisions = vcs.log(parent=None, limit=NUM_RECENT_COMMITS, first_parent=False) else: revisions = vcs.log(parent=None, limit=NUM_RECENT_COMMITS) for commit in revisions: known_revision = Revision.query.filter( Revision.repository_id == repo.id, Revision.sha == commit.id ).with_for_update().scalar() if known_revision and known_revision.date_created_signal: db.session.commit() continue revision, created, _ = commit.save(repo) db.session.commit() # Lock the revision. revision = Revision.query.filter( Revision.repository_id == repo.id, Revision.sha == commit.id ).with_for_update().scalar() # Fire the signal if the revision was created or its branches were discovered. # # The `revision.branches` check is a hack right now to prevent builds from # triggering on branchless commits. if revision.branches and not revision.date_created_signal: revision.date_created_signal = datetime.utcnow() fire_signal.delay( signal='revision.created', kwargs={'repository_id': repo.id.hex, 'revision_sha': revision.sha}, ) db.session.commit() db.session.commit() Repository.query.filter( Repository.id == repo.id, ).update({ 'last_update': datetime.utcnow(), }, synchronize_session=False) db.session.commit() return True
def sync_job(job_id): with RCount('sync_job'): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg( min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg( max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int((job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = aggregate_result((j.result for j in all_phases)) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception('Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def create_or_update_revision_result(revision_sha, project_id, propagation_limit): """Given a revision sha and project ID, try to update the revision result for it. This involves copying results for unaffected Bazel targets from the latest parent build. `propagation_limit` is used to control how many times this function will be called recursively on the revision's children. If it is 0, then this function only updates the current revision's revision result and does not do any recursion. """ # type: (str, UUID, int) -> None project = Project.query.get(project_id) revision = Revision.query.filter( Revision.sha == revision_sha, Revision.repository_id == project.repository_id, ).first() last_finished_build = get_latest_finished_build_for_revision( revision_sha, project_id) if not last_finished_build: return unaffected_targets = BazelTarget.query.join( Job, BazelTarget.job_id == Job.id, ).filter( BazelTarget.result_source == ResultSource.from_parent, Job.build_id == last_finished_build.id, ).all() if len(unaffected_targets) > 0 and len(revision.parents) > 0: # TODO(naphat) there's probably a better way to select parent, # but that happens rarely enough that it can be punted for now parent_revision_sha = revision.parents[0] # TODO(naphat) we should find a better way to select parent builds. # Even if a parent build is not finished, we can already start to # take a look at target results, as it may already have results # for all of our unaffected_targets # perhaps an optimization is to take the latest build, instead # of the latest finished build. Finished build are more likely # to have the complete set of targets we need though. But if # a finished build is not the latest build, then maybe that # finished build had an infra failure. Anyways, for simplicity, # let's stick to finished build for now. parent_build = get_latest_finished_build_for_revision( parent_revision_sha, project_id) if parent_build: # group unaffected targets by jobs unaffected_targets_groups = defaultdict(lambda: {}) for target in unaffected_targets: unaffected_targets_groups[target.job_id][target.name] = target # process targets in batch, grouped by job id # almost always, this is going to be a single job - there is # usually only one autogenerated plan per project. for job_id, targets_dict in unaffected_targets_groups.iteritems(): jobplan = JobPlan.query.filter( JobPlan.project_id == project_id, JobPlan.build_id == last_finished_build.id, JobPlan.job_id == job_id, ).first() if not jobplan: continue parent_targets = BazelTarget.query.join( Job, BazelTarget.job_id == Job.id, ).join( JobPlan, BazelTarget.job_id == JobPlan.job_id, ).filter( Job.build_id == parent_build.id, BazelTarget.name.in_(targets_dict), JobPlan.plan_id == jobplan.plan_id, ) for parent_target in parent_targets: targets_dict[ parent_target.name].result = parent_target.result db.session.add(targets_dict[parent_target.name]) else: logger.info( "Revision %s could not find a parent build for parent revision %s.", revision_sha, parent_revision_sha) revision_result, _ = create_or_update( RevisionResult, where={ 'revision_sha': revision_sha, 'project_id': project_id, }, values={ 'build_id': last_finished_build.id, 'result': aggregate_result([last_finished_build.result] + [t.result for t in unaffected_targets]), }) db.session.commit() fire_signal.delay( signal='revision_result.updated', kwargs={'revision_result_id': revision_result.id.hex}, ) if propagation_limit > 0: # TODO stop the propagation if nothing changed for child_revision in get_child_revisions(revision): create_or_update_revision_result( child_revision.sha, project_id, propagation_limit=propagation_limit - 1)
def sync_build(build_id): """ Synchronizing the build happens continuously until all jobs have reported in as finished or have failed/aborted. This task is responsible for: - Checking in with jobs - Aborting/retrying them if they're beyond limits - Aggregating the results from jobs into the build itself """ build = Build.query.get(build_id) if not build: return if build.status == Status.finished: return all_jobs = list(Job.query.filter(Job.build_id == build_id, )) is_finished = sync_build.verify_all_children() == Status.finished build.date_started = safe_agg(min, (j.date_started for j in all_jobs if j.date_started)) if is_finished: build.date_finished = safe_agg(max, (j.date_finished for j in all_jobs if j.date_finished)) else: build.date_finished = None if build.date_started and build.date_finished: build.duration = int( (build.date_finished - build.date_started).total_seconds() * 1000) else: build.duration = None if any(j.result is Result.failed for j in all_jobs): build.result = Result.failed elif is_finished: build.result = safe_agg(max, (j.result for j in all_jobs)) else: build.result = Result.unknown if is_finished: build.status = Status.finished elif any(j.status is not Status.queued for j in all_jobs): build.status = Status.in_progress else: build.status = Status.queued if db.session.is_modified(build): build.date_modified = datetime.utcnow() db.session.add(build) db.session.commit() if not is_finished: raise sync_build.NotFinished try: aggregate_build_stat(build, 'test_count') aggregate_build_stat(build, 'test_duration') aggregate_build_stat(build, 'test_failures') aggregate_build_stat(build, 'test_rerun_count') aggregate_build_stat(build, 'tests_missing') aggregate_build_stat(build, 'lines_covered') aggregate_build_stat(build, 'lines_uncovered') aggregate_build_stat(build, 'diff_lines_covered') aggregate_build_stat(build, 'diff_lines_uncovered') except Exception: current_app.logger.exception( 'Failing recording aggregate stats for build %s', build.id) fire_signal.delay( signal='build.finished', kwargs={'build_id': build.id.hex}, ) queue.delay('update_project_stats', kwargs={ 'project_id': build.project_id.hex, }, countdown=1)
def sync_job(job_id): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) is_finished = sync_job.verify_all_children() == Status.finished if is_finished: job.status = Status.finished db.session.flush() all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases) job.date_started = safe_agg(min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg(max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int( (job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = safe_agg(max, (j.result for j in all_phases)) else: job.result = Result.unknown if is_finished: job.status = Status.finished elif any(j.status is not Status.queued for j in all_phases): job.status = Status.in_progress else: job.status = Status.queued if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception( 'Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def sync_job(job_id): """ Updates jobphase and job statuses based on the status of the constituent jobsteps. """ job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases, implementation) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg(min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg(max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int( (job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # If any test cases were marked as failing, fail the build. # The exception is if the only failing test case occurred in a JobStep that # had an infra failure. In this case we can't trust the test case result as # being meaningful and so we ignore these. elif TestCase.query.join(JobStep, JobStep.id == TestCase.step_id).filter( TestCase.result == Result.failed, TestCase.job_id == job.id, JobStep.result != Result.infra_failed).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: # Sets the final job result. implementation.validate(job=job) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception( 'Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception( 'Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def sync_build(build_id): """ Synchronizing the build happens continuously until all jobs have reported in as finished or have failed/aborted. This task is responsible for: - Checking in with jobs - Aborting/retrying them if they're beyond limits - Aggregating the results from jobs into the build itself """ build = Build.query.get(build_id) if not build: return if build.status == Status.finished: return all_jobs = list(Job.query.filter( Job.build_id == build_id, )) is_finished = sync_build.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_jobs): is_finished = False build.date_started = safe_agg( min, (j.date_started for j in all_jobs if j.date_started)) if is_finished: build.date_finished = safe_agg( max, (j.date_finished for j in all_jobs if j.date_finished)) else: build.date_finished = None if build.date_started and build.date_finished: build.duration = int((build.date_finished - build.date_started).total_seconds() * 1000) else: build.duration = None if any(j.result is Result.failed for j in all_jobs): build.result = Result.failed elif is_finished: build.result = aggregate_result((j.result for j in all_jobs)) else: build.result = Result.unknown if is_finished: build.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_jobs)) if new_status != Status.finished: build.status = new_status if db.session.is_modified(build): build.date_modified = datetime.utcnow() db.session.add(build) db.session.commit() if not is_finished: raise sync_build.NotFinished try: aggregate_build_stat(build, 'test_count') aggregate_build_stat(build, 'test_duration') aggregate_build_stat(build, 'test_failures') aggregate_build_stat(build, 'test_rerun_count') aggregate_build_stat(build, 'tests_missing') aggregate_build_stat(build, 'lines_covered') aggregate_build_stat(build, 'lines_uncovered') aggregate_build_stat(build, 'diff_lines_covered') aggregate_build_stat(build, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for build %s', build.id) fire_signal.delay( signal='build.finished', kwargs={'build_id': build.id.hex}, ) queue.delay('update_project_stats', kwargs={ 'project_id': build.project_id.hex, }, countdown=1)
def sync_job(job_id): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step job_plan = JobPlan.query.options( subqueryload_all('plan.steps') ).filter( JobPlan.job_id == job.id, ).join(Plan).first() try: if not job_plan: raise UnrecoverableException('Got sync_job task without job plan: %s' % (job.id,)) try: step = job_plan.plan.steps[0] except IndexError: raise UnrecoverableException('Missing steps for plan') implementation = step.get_implementation() implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) is_finished = sync_job.verify_all_children() == Status.finished if is_finished: job.status = Status.finished all_phases = list(job.phases) job.date_started = safe_agg( min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg( max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int((job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = safe_agg( max, (j.result for j in all_phases), Result.unknown) else: job.result = Result.unknown if is_finished: job.status = Status.finished elif any(j.status is not Status.queued for j in all_phases): job.status = Status.in_progress else: job.status = Status.queued if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if job_plan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': job_plan.plan_id.hex, }, countdown=1)
def sync_repo(repo_id, continuous=True): """ Polls repositories for new commits, and fires signals for revisions. """ repo = Repository.query.get(repo_id) if not repo: logger.error('Repository %s not found', repo_id) return vcs = repo.get_vcs() if vcs is None: logger.warning('Repository %s has no VCS backend set', repo.id) return if repo.status != RepositoryStatus.active: logger.info('Repository %s is not active', repo.id) return Repository.query.filter( Repository.id == repo.id, ).update({ 'last_update_attempt': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if vcs.exists(): vcs.update() else: vcs.clone() # The loop below do two things: # 1) adds new revisions to the database # 2) fire off revision created signals for recent revisions # # TODO(dcramer): this doesnt scrape everything, and really we wouldn't # want to do this all in a single job so we should split this into a # backfill task for commit in vcs.log(parent=None, limit=NUM_RECENT_COMMITS): known_revision = Revision.query.filter( Revision.repository_id == repo_id, Revision.sha == commit.id ).with_for_update().scalar() if known_revision and known_revision.date_created_signal: db.session.commit() continue revision, created, _ = commit.save(repo) db.session.commit() # Lock the revision. revision = Revision.query.filter( Revision.repository_id == repo_id, Revision.sha == commit.id ).with_for_update().scalar() # Fire the signal if the revision was created or its branches were discovered. # # The `revision.branches` check is a hack right now to prevent builds from # triggering on branchless commits. if revision.branches and not revision.date_created_signal: revision.date_created_signal = datetime.utcnow() fire_signal.delay( signal='revision.created', kwargs={'repository_id': repo.id.hex, 'revision_sha': revision.sha}, ) db.session.commit() db.session.commit() Repository.query.filter( Repository.id == repo.id, ).update({ 'last_update': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if continuous: raise sync_repo.NotFinished
def sync(repo): """ Checks the repository for new commits, and fires revision.created signals. """ vcs = repo.get_vcs() if vcs is None: logger.warning('Repository %s has no VCS backend set', repo.id) return False if repo.status != RepositoryStatus.active: logger.info('Repository %s is not active', repo.id) return False Repository.query.filter(Repository.id == repo.id, ).update( { 'last_update_attempt': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if vcs.exists(): try: vcs.update() except ConcurrentUpdateError: # Updating already so no need to update. pass else: vcs.clone() # The loop below do two things: # 1) adds new revisions to the database # 2) fire off revision created signals for recent revisions # # TODO(dcramer): this doesnt scrape everything, and really we wouldn't # want to do this all in a single job so we should split this into a # backfill task if repo.backend == RepositoryBackend.git: revisions = vcs.log(parent=None, limit=NUM_RECENT_COMMITS, first_parent=False) else: revisions = vcs.log(parent=None, limit=NUM_RECENT_COMMITS) for commit in revisions: known_revision = Revision.query.filter( Revision.repository_id == repo.id, Revision.sha == commit.id).with_for_update().scalar() if known_revision and known_revision.date_created_signal: db.session.commit() continue revision, created, _ = commit.save(repo) db.session.commit() # Lock the revision. revision = Revision.query.filter( Revision.repository_id == repo.id, Revision.sha == commit.id).with_for_update().scalar() # Fire the signal if the revision was created or its branches were discovered. # # The `revision.branches` check is a hack right now to prevent builds from # triggering on branchless commits. if revision.branches and not revision.date_created_signal: revision.date_created_signal = datetime.utcnow() fire_signal.delay( signal='revision.created', kwargs={ 'repository_id': repo.id.hex, 'revision_sha': revision.sha }, ) db.session.commit() db.session.commit() Repository.query.filter(Repository.id == repo.id, ).update( { 'last_update': datetime.utcnow(), }, synchronize_session=False) db.session.commit() return True
def create_or_update_revision_result(revision_sha, project_id, propagation_limit): """Given a revision sha and project ID, try to update the revision result for it. This involves copying results for unaffected Bazel targets from the latest parent build. `propagation_limit` is used to control how many times this function will be called recursively on the revision's children. If it is 0, then this function only updates the current revision's revision result and does not do any recursion. """ # type: (str, UUID, int) -> None project = Project.query.get(project_id) revision = Revision.query.filter( Revision.sha == revision_sha, Revision.repository_id == project.repository_id, ).first() last_finished_build = get_latest_finished_build_for_revision( revision_sha, project_id) if not last_finished_build: return unaffected_targets = BazelTarget.query.join( Job, BazelTarget.job_id == Job.id, ).filter( BazelTarget.result_source == ResultSource.from_parent, Job.build_id == last_finished_build.id, ).all() if len(unaffected_targets) > 0 and len(revision.parents) > 0: # TODO(naphat) there's probably a better way to select parent, # but that happens rarely enough that it can be punted for now parent_revision_sha = revision.parents[0] # TODO(naphat) we should find a better way to select parent builds. # Even if a parent build is not finished, we can already start to # take a look at target results, as it may already have results # for all of our unaffected_targets # perhaps an optimization is to take the latest build, instead # of the latest finished build. Finished build are more likely # to have the complete set of targets we need though. But if # a finished build is not the latest build, then maybe that # finished build had an infra failure. Anyways, for simplicity, # let's stick to finished build for now. parent_build = get_latest_finished_build_for_revision( parent_revision_sha, project_id) if parent_build: # group unaffected targets by jobs unaffected_targets_groups = defaultdict(lambda: {}) for target in unaffected_targets: unaffected_targets_groups[target.job_id][target.name] = target # process targets in batch, grouped by job id # almost always, this is going to be a single job - there is # usually only one autogenerated plan per project. for job_id, targets_dict in unaffected_targets_groups.iteritems(): jobplan = JobPlan.query.filter( JobPlan.project_id == project_id, JobPlan.build_id == last_finished_build.id, JobPlan.job_id == job_id, ).first() if not jobplan: continue parent_targets = BazelTarget.query.join( Job, BazelTarget.job_id == Job.id, ).join( JobPlan, BazelTarget.job_id == JobPlan.job_id, ).filter( Job.build_id == parent_build.id, BazelTarget.name.in_(targets_dict), JobPlan.plan_id == jobplan.plan_id, ) for parent_target in parent_targets: targets_dict[parent_target.name].result = parent_target.result db.session.add(targets_dict[parent_target.name]) else: logger.info("Revision %s could not find a parent build for parent revision %s.", revision_sha, parent_revision_sha) revision_result, _ = create_or_update(RevisionResult, where={ 'revision_sha': revision_sha, 'project_id': project_id, }, values={ 'build_id': last_finished_build.id, 'result': aggregate_result([last_finished_build.result] + [t.result for t in unaffected_targets]), }) db.session.commit() fire_signal.delay( signal='revision_result.updated', kwargs={'revision_result_id': revision_result.id.hex}, ) if propagation_limit > 0: # TODO stop the propagation if nothing changed for child_revision in get_child_revisions(revision): create_or_update_revision_result( child_revision.sha, project_id, propagation_limit=propagation_limit - 1)