def aggregate_build_stats_for_job(job_id: UUID): """ Given a job, aggregate its data upwards into the Build.abs This should generally be fired upon a job's completion, or alternatively it can be used to repair aggregate data. """ lock_key = "job:{job_id}".format(job_id=job_id) with redis.lock(lock_key): job = (Job.query.unrestricted_unsafe().with_for_update( nowait=True).filter(Job.id == job_id).first()) if not job: raise ValueError auth.set_current_tenant( auth.RepositoryTenant(repository_id=job.repository_id)) # we need to handle the race between when the mutations were made to <Job> and # when the only remaining artifact may have finished processing if job.status == Status.collecting_results: if not has_unprocessed_artifacts(job): job.status = Status.finished if not job.date_finished: job.date_finished = timezone.now() db.session.add(job) db.session.commit() else: pending_artifact_ids = db.session.query( PendingArtifact.id).filter( PendingArtifact.repository_id == job.repository_id, PendingArtifact.provider == job.provider, PendingArtifact.external_build_id == job.build.external_id, PendingArtifact.external_job_id == job.external_id, ) for pa_id in pending_artifact_ids: process_pending_artifact.delay(pending_artifact_id=pa_id) # record any job-specific stats that might not have been taken care elsewhere if job.status == Status.finished: record_test_stats(job.id) record_style_violation_stats(job.id) record_bundle_stats(job.id) record_failure_reasons(job) db.session.commit() lock_key = "aggstatsbuild:{build_id}".format(build_id=job.build_id.hex) with redis.lock(lock_key): aggregate_build_stats.delay(build_id=job.build_id)
def upsert_build(hook: Hook, external_id: str, data: dict = None) -> Response: provider_name = hook.get_provider().get_name(hook.config) lock_key = "hook:build:{repo_id}:{provider}:{build_xid}".format( repo_id=hook.repository_id, provider=provider_name, build_xid=external_id ) # TODO (here and in other upsert_* functions): it's better to move all the locking # code to async tasks. with redis.lock(lock_key, timeout=BUILD_LOCK_TIMEOUT, expire=30): json = data.copy() if data else {} json["external_id"] = external_id json["provider"] = provider_name json["hook_id"] = str(hook.id) build = Build.query.filter( Build.provider == provider_name, Build.external_id == external_id ).first() if build: return client.put( "/repos/{}/builds/{}".format( hook.repository.get_full_name(), build.number ), json=json, ) return client.post( "/repos/{}/builds".format(hook.repository.get_full_name()), json=json )
def identify_revision(repository: Repository, treeish: str): """ Attempt to transform a a commit-like reference into a valid revision. """ # try to find it from the database first if len(treeish) == 40: revision = Revision.query.filter( Revision.repository_id == repository.id, Revision.sha == treeish ).first() if revision: return revision try: vcs = repository.get_vcs() except UnknownRepositoryBackend: return None vcs.ensure(update_if_exists=False) lock_key = "sync_repo:{repo_id}".format(repo_id=repository.id) # lock this update to avoild piling up duplicate fetch/save calls with redis.lock(lock_key, expire=30): try: commit = next(vcs.log(parent=treeish, limit=1)) except UnknownRevision: vcs.update() commit = next(vcs.log(parent=treeish, limit=1)) revision, _ = commit.save(repository) return revision
def upsert_job(build: Build, hook: Hook, external_id: str, data: dict = None) -> Response: provider_name = hook.get_provider().get_name(hook.config) lock_key = "upsert:job:{build_id}:{provider}:{job_xid}".format( build_id=build.id, provider=provider_name, job_xid=external_id) with redis.lock(lock_key): json = data.copy() if data else {} json["external_id"] = external_id json["provider"] = provider_name json["hook_id"] = str(hook.id) job = Job.query.filter( Job.provider == provider_name, Job.external_id == external_id, Job.build_id == build.id, ).first() if job: return client.put( "/repos/{}/builds/{}/jobs/{}".format( build.repository.get_full_name(), job.build.number, job.number), json=json, ) return client.post( "/repos/{}/builds/{}/jobs".format(build.repository.get_full_name(), build.number), json=json, )
def upsert_job(build: Build, provider: str, external_id: str, data: dict = None) -> Response: lock_key = 'upsert:job:{build_id}:{provider}:{job_xid}'.format( build_id=build.id, provider=provider, job_xid=external_id, ) with redis.lock(lock_key): json = data.copy() if data else {} json['external_id'] = external_id json['provider'] = provider job = Job.query.filter( Job.provider == provider, Job.external_id == external_id, Job.build_id == build.id, ).first() if job: return client.put('/repos/{}/builds/{}/jobs/{}'.format( build.repository.get_full_name(), job.build.number, job.number, ), json=json) return client.post('/repos/{}/builds/{}/jobs'.format( build.repository.get_full_name(), build.number, ), json=json)
def aggregate_build_stats_for_job(job_id: UUID): """ Given a job, aggregate its data upwards into the Build.abs This should generally be fired upon a job's completion, or alternatively it can be used to repair aggregate data. """ job = Job.query.unrestricted_unsafe().options( joinedload('project'), ).filter(Job.id == job_id, ).first() if not job: raise ValueError auth.set_current_tenant( auth.Tenant( organization_ids=[job.organization_id], project_ids=[job.project_id], repository_ids=[job.project.repository_id], )) # record any job-specific stats that might not have been taken care elsewhere # (we might want to move TestResult's stats here as well, or move coverage's # stats elsewhere) record_coverage_stats(job) lock_key = 'aggstatsbuild:{build_id}'.format(build_id=job.build_id.hex, ) with redis.lock(lock_key): aggregate_build_stats(job.build_id)
def upsert_change_request(repository: Repository, provider: str, external_id: str, data: dict = None) -> Response: lock_key = "hook:cr:{repo_id}:{provider}:{cr_xid}".format( repo_id=repository.id, provider=provider, cr_xid=external_id) with redis.lock(lock_key): json = data.copy() if data else {} json["external_id"] = external_id json["provider"] = provider cr = ChangeRequest.query.filter( ChangeRequest.repository_id == repository.id, ChangeRequest.provider == provider, ChangeRequest.external_id == external_id, ).first() if cr: return client.put( "/repos/{}/change-requests/{}".format( repository.get_full_name(), cr.number), json=json, ) return client.post("/repos/{}/change-requests".format( repository.get_full_name()), json=json)
def upsert_build(repository: Repository, provider: str, external_id: str, data: dict = None) -> Response: lock_key = 'hook:build:{repo_id}:{provider}:{build_xid}'.format( repo_id=repository.id, provider=provider, build_xid=external_id, ) with redis.lock(lock_key): json = data.copy() if data else {} json['external_id'] = external_id json['provider'] = provider build = Build.query.filter( Build.provider == provider, Build.external_id == external_id, ).first() if build: return client.put('/repos/{}/builds/{}'.format( repository.get_full_name(), build.number, ), json=json) return client.post('/repos/{}/builds'.format( repository.get_full_name(), ), json=json)
def resolve_ref_for_change_request(change_request_id: UUID): lock_key = f"resolve-cr-ref:{change_request_id}" with redis.lock(lock_key, timeout=60.0, nowait=True): cr = ChangeRequest.query.unrestricted_unsafe().get(change_request_id) if not cr: raise ValueError( "Unable to find change request with id = {}".format( change_request_id)) auth.set_current_tenant( auth.RepositoryTenant(repository_id=cr.repository_id)) if not cr.parent_revision_sha and cr.parent_ref: try: revision = revisions.identify_revision(cr.repository, cr.parent_ref, with_vcs=True) except InvalidPublicKey: raise cr.parent_revision_sha = revision.sha db.session.add(cr) db.session.commit() if not cr.head_revision_sha and cr.head_ref: revision = revisions.identify_revision(cr.repository, cr.head_ref, with_vcs=True) cr.head_revision_sha = revision.sha if not cr.authors and revision.authors: cr.authors = revision.authors db.session.add(cr) db.session.commit()
def upsert_build(hook: Hook, external_id: str, data: dict = None) -> Response: provider_name = hook.get_provider().get_name(hook.config) lock_key = "hook:build:{repo_id}:{provider}:{build_xid}".format( repo_id=hook.repository_id, provider=provider_name, build_xid=external_id) with redis.lock(lock_key): json = data.copy() if data else {} json["external_id"] = external_id json["provider"] = provider_name json["hook_id"] = str(hook.id) build = Build.query.filter(Build.provider == provider_name, Build.external_id == external_id).first() if build: return client.put( "/repos/{}/builds/{}".format(hook.repository.get_full_name(), build.number), json=json, ) return client.post("/repos/{}/builds".format( hook.repository.get_full_name()), json=json)
def aggregate_build_stats_for_job(job_id: UUID): """ Given a job, aggregate its data upwards into the Build.abs This should generally be fired upon a job's completion, or alternatively it can be used to repair aggregate data. """ lock_key = 'job:{job_id}'.format(job_id=job_id, ) with redis.lock(lock_key): job = Job.query.unrestricted_unsafe().with_for_update( nowait=True).filter(Job.id == job_id, ).first() if not job: raise ValueError auth.set_current_tenant( auth.Tenant(repository_ids=[job.repository_id])) # we need to handle the race between when the mutations were made to <Job> and # when the only remaining artifact may have finished processing if job.status == Status.collecting_results and not has_unprocessed_artifacts( job.id): job.status = Status.finished if not job.date_finished: job.date_finished = timezone.now() db.session.add(job) db.session.commit() # record any job-specific stats that might not have been taken care elsewhere # (we might want to move TestResult's stats here as well) if job.status == Status.finished: record_test_stats(job.id) record_style_violation_stats(job.id) record_bundle_stats(job.id) record_failure_reasons(job) lock_key = 'aggstatsbuild:{build_id}'.format(build_id=job.build_id.hex, ) with redis.lock(lock_key): aggregate_build_stats.delay(build_id=job.build_id)
def resolve_ref_for_build(build_id: UUID): lock_key = f"resolve-build-ref:{build_id}" with redis.lock(lock_key, timeout=60.0, nowait=True): build = Build.query.unrestricted_unsafe().get(build_id) if not build: raise ValueError( "Unable to find build with id = {}".format(build_id)) if build.revision_sha: return auth.set_current_tenant( auth.RepositoryTenant(repository_id=build.repository_id)) revision: Optional[Revision] = None try: revision = revisions.identify_revision(build.repository, build.ref, with_vcs=True) except UnknownRevision: build.result = Result.errored build.status = Status.finished try: with db.session.begin_nested(): db.session.add( FailureReason( repository_id=build.repository_id, build_id=build.id, reason=FailureReason.Reason.unresolvable_ref, )) db.session.flush() except IntegrityError as exc: if "duplicate" not in str(exc): raise except InvalidPublicKey: pass if revision: build.revision_sha = revision.sha if not build.authors and revision.authors: build.authors = revision.authors if not build.label: build.label = revision.message.split("\n")[0] db.session.add(build) db.session.commit() data = build_schema.dump(build) publish("builds", "build.update", data)
def process(self, fp): results = self.get_coverage(fp) for result in results: try: with db.session.begin_nested(): db.session.add(result) except IntegrityError: lock_key = "coverage:{build_id}:{file_hash}".format( build_id=result.build_id.hex, file_hash=sha1(result.filename.encode("utf-8")).hexdigest(), ) with redis.lock(lock_key): result = self.merge_coverage(result) db.session.add(result) db.session.flush() return results
def process(self, fp): results = self.get_coverage(fp) for result in results: try: with db.session.begin_nested(): db.session.add(result) except IntegrityError: lock_key = 'coverage:{job_id}:{file_hash}'.format( job_id=result.job_id.hex, file_hash=sha1( result.filename.encode('utf-8')).hexdigest(), ) with redis.lock(lock_key): result = self.merge_coverage(result) db.session.add(result) db.session.commit() return results
def post(self): """ Activate a GitHub repository. """ repo_name = (request.get_json() or {}).get('name') if not repo_name: return self.error('missing repo_name parameter') owner_name, repo_name = repo_name.split('/', 1) user = auth.get_current_user() provider = GitHubRepositoryProvider(cache=False) try: repo_data = provider.get_repo(user=user, owner_name=owner_name, repo_name=repo_name) except IdentityNeedsUpgrade as exc: return self.respond( { 'provider': 'github', 'error': 'identity_needs_upgrade', 'url': exc.get_upgrade_url(), }, 401) if not repo_data['admin']: return self.respond( { 'message': 'Insufficient permissions to activate repository', }, 403) lock_key = 'repo:{provider}/{owner_name}/{repo_name}'.format( provider='github', owner_name=owner_name, repo_name=repo_name, ) with redis.lock(lock_key): try: with db.session.begin_nested(): # bind various github specific attributes repo = Repository( backend=RepositoryBackend.git, provider=RepositoryProvider.github, status=RepositoryStatus.active, external_id=str(repo_data['id']), owner_name=owner_name, name=repo_name, url=repo_data['url'], data=repo_data['config'], ) db.session.add(repo) db.session.flush() except IntegrityError: repo = Repository.query.unrestricted_unsafe().filter( Repository.provider == RepositoryProvider.github, Repository.external_id == str(repo_data['id']), ).first() # it's possible to get here if the "full name" already exists assert repo needs_configured = repo.status == RepositoryStatus.inactive if needs_configured: repo.status = RepositoryStatus.active db.session.add(repo) else: needs_configured = True if needs_configured: # generate a new private key for use on github key = ssh.generate_key() db.session.add( ItemOption( item_id=repo.id, name='auth.private-key', value=key.private_key, )) # register key with github provider.add_key( user=user, repo_name=repo_name, owner_name=owner_name, key=key, ) # we need to commit before firing off the task db.session.commit() import_repo.delay(repo_id=repo.id) try: with db.session.begin_nested(): db.session.add( RepositoryAccess( repository_id=repo.id, user_id=user.id, )) db.session.flush() except IntegrityError: pass db.session.commit() return self.respond_with_schema(repo_schema, repo, 201)
def aggregate_build_stats(build_id: UUID): """ Updates various denormalized / aggregate attributes on Build per its jobs. These attributes include start and completion dates, as well as the status and result. """ # now we pull in the entirety of the build's data to aggregate state upward lock_key = "build:{build_id}".format(build_id=build_id) with redis.lock(lock_key): build = (Build.query.unrestricted_unsafe().with_for_update( nowait=True).get(build_id)) if not build: raise ValueError( "Unable to find build with id = {}".format(build_id)) auth.set_current_tenant( auth.RepositoryTenant(repository_id=build.repository_id)) record_coverage_stats(build.id) job_list = Job.query.filter(Job.build_id == build.id) was_finished = build.status == Status.finished is_finished = all(p.status == Status.finished for p in job_list) # ensure build's dates are reflective of jobs build.date_started = safe_agg(min, (j.date_started for j in job_list if j.date_started)) if is_finished: build.date_finished = safe_agg( max, (j.date_finished for j in job_list if j.date_finished)) else: build.date_finished = None # if theres any failure, the build failed if any(j.result is Result.failed for j in job_list if not j.allow_failure): build.result = Result.failed # else, if we're finished, we can aggregate from results elif is_finished: if not job_list: build.result = Result.errored elif not any(j for j in job_list if not j.allow_failure): build.result = Result.passed else: build.result = aggregate_result( (j.result for j in job_list if not j.allow_failure)) # we should never get here as long we've got jobs and correct data else: build.result = Result.unknown if is_finished: build.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in job_list)) if build.status != new_status: build.status = new_status db.session.add(build) db.session.commit() # we dont bother aggregating stats unless we're finished if build.status == Status.finished and not was_finished: for stat in AGGREGATED_BUILD_STATS: aggregate_stat_for_build(build, stat) db.session.commit() send_build_notifications.delay(build_id=build.id)
def post(self): """ Activate a GitHub repository. """ repo_name = (request.get_json() or {}).get("name") if not repo_name: return self.error("missing repo_name parameter") owner_name, repo_name = repo_name.split("/", 1) user = auth.get_current_user() provider = GitHubRepositoryProvider(cache=False) try: repo_data = provider.get_repo(user=user, owner_name=owner_name, repo_name=repo_name) except IdentityNeedsUpgrade as exc: return self.respond( { "provider": "github", "error": "identity_needs_upgrade", "url": exc.get_upgrade_url(), }, 401, ) if Permission.admin not in repo_data["permission"]: return self.respond( {"message": "Insufficient permissions to activate repository"}, 403) lock_key = Repository.get_lock_key(RepositoryProvider.github, owner_name, repo_name) with redis.lock(lock_key): try: with db.session.begin_nested(): # bind various github specific attributes repo = Repository( backend=RepositoryBackend.git, provider=RepositoryProvider.github, status=RepositoryStatus.active, external_id=str(repo_data["id"]), owner_name=owner_name, name=repo_name, url=repo_data["url"], data=repo_data["config"], ) db.session.add(repo) db.session.flush() except IntegrityError: repo = (Repository.query.unrestricted_unsafe().filter( Repository.provider == RepositoryProvider.github, Repository.external_id == str(repo_data["id"]), ).first()) # it's possible to get here if the "full name" already exists assert repo needs_configured = repo.status == RepositoryStatus.inactive if needs_configured: repo.status = RepositoryStatus.active db.session.add(repo) else: needs_configured = True if needs_configured: # generate a new private key for use on github key = ssh.generate_key() db.session.add( ItemOption(item_id=repo.id, name="auth.private-key", value=key.private_key)) # register key with github # TODO(dcramer): we should store this key reference so we can delete it # when the user deactivates the repo provider.add_key(user=user, repo_name=repo_name, owner_name=owner_name, key=key) db.session.commit() try: with db.session.begin_nested(): db.session.add( RepositoryAccess( repository_id=repo.id, user_id=user.id, permission=repo_data["permission"], )) db.session.flush() except IntegrityError: pass db.session.commit() return self.respond_with_schema(repo_schema, repo, 201)