def test_ingest_running_to_complete_job(result_set_stored, failure_classifications, mock_buildapi_running_url, mock_buildapi_builds4h_url, mock_log_parser): """ a new buildapi running job transitions to a new completed job """ etl_process = RunningJobsProcess() etl_process.run() assert Job.objects.count() == 1 # the first job in the sample data should overwrite the running job # we just ingested. Leaving us with only 32 jobs, not 33. etl_process = Builds4hJobsProcess() etl_process.run() assert Job.objects.count() == 32 # all jobs should be completed, including the original one which # transitioned from running. for job in Job.objects.all(): assert job.state == 'completed'
def test_ingest_running_to_complete_job(jm, mock_buildapi_running_url, mock_buildapi_builds4h_url, mock_log_parser, mock_get_resultset): """ a new buildapi running job transitions to a new completed job """ etl_process = RunningJobsProcess() etl_process.run() stored_running = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_running) == 1 # the first job in the sample data should overwrite the running job # we just ingested. Leaving us with only 32 jobs, not 33. etl_process = Builds4hJobsProcess() etl_process.run() stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_obj) == 32 # all jobs should be completed, including the original one which # transitioned from running. for job in stored_obj: assert job['state'] == 'completed'
def test_ingest_running_job_fields(result_set_stored, failure_classifications, mock_buildapi_running_url, mock_log_parser): """ a new buildapi running job creates a new obj in the job table """ etl_process = RunningJobsProcess() etl_process.run() assert Job.objects.count() == 1 assert time.mktime(Job.objects.all()[0].start_time.timetuple()) > 0
def test_ingest_running_job_fields(jm, mock_buildapi_running_url, mock_log_parser, mock_get_resultset): """ a new buildapi running job creates a new obj in the job table """ etl_process = RunningJobsProcess() etl_process.run() stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_obj) == 1 assert stored_obj[0]["start_timestamp"] is not 0
def test_ingest_running_jobs(result_set_stored, failure_classifications, mock_buildapi_running_url, mock_log_parser): """ a new buildapi running job creates a new obj in the job table """ etl_process = RunningJobsProcess() new_jobs_were_added = etl_process.run() assert new_jobs_were_added is True assert cache.get(CACHE_KEYS['running']) == {24767134} new_jobs_were_added = etl_process.run() assert new_jobs_were_added is False assert Job.objects.count() == 1
def test_ingest_running_to_complete_job(jm, initial_data, mock_buildapi_running_url, mock_buildapi_builds4h_url, mock_post_json_data, mock_log_parser, mock_get_resultset, mock_get_remote_content): """ a new buildapi running job transitions to a new completed job Also ensure that a running job does NOT go through the objectstore. """ from treeherder.etl.buildapi import RunningJobsProcess from treeherder.etl.buildapi import Builds4hJobsProcess etl_process = RunningJobsProcess() etl_process.run() stored_running = jm.get_jobs_dhub().execute( proc="jobs_test.selects.jobs") stored_objectstore = jm.get_os_dhub().execute( proc="objectstore_test.selects.all") # ensure running jobs do not go to the objectstore, but go directly # to the jobs table without needing process_objects assert len(stored_objectstore) == 0 assert len(stored_running) == 1 # the first job in the sample data should overwrite the running job # we just ingested. Leaving us with only 20 jobs, not 21. etl_process = Builds4hJobsProcess() etl_process.run() jm.process_objects(20) stored_obj = jm.get_jobs_dhub().execute( proc="jobs_test.selects.jobs") jm.disconnect() assert len(stored_obj) == 20 # all jobs should be completed, including the original one which # transitioned from running. for job in stored_obj: assert job['state'] == 'completed'
def test_ingest_running_jobs_1_missing_resultset( jm, initial_data, sample_resultset, test_repository, mock_buildapi_running_missing1_url, mock_post_json, mock_get_resultset, mock_fetch_json, activate_responses): """ Ensure the running job with the missing resultset is queued for refetching """ etl_process = RunningJobsProcess() _do_missing_resultset_test(jm, etl_process)
def test_ingest_running_jobs(jm, mock_buildapi_running_url, mock_log_parser, mock_get_resultset): """ a new buildapi running job creates a new obj in the job table """ etl_process = RunningJobsProcess() new_jobs_were_added = etl_process.run() assert new_jobs_were_added is True assert cache.get(CACHE_KEYS['running']) == {24767134} new_jobs_were_added = etl_process.run() assert new_jobs_were_added is False stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_obj) == 1
def test_ingest_running_jobs(jm, initial_data, mock_buildapi_running_url, mock_post_json, mock_log_parser, mock_get_resultset, mock_fetch_json): """ a new buildapi running job creates a new obj in the job table """ etl_process = RunningJobsProcess() new_jobs_were_added = etl_process.run() assert new_jobs_were_added is True assert cache.get(CACHE_KEYS['running']) == set([24767134]) new_jobs_were_added = etl_process.run() assert new_jobs_were_added is False stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") jm.disconnect() assert len(stored_obj) == 1
def test_ingest_running_jobs(jm, initial_data, mock_buildapi_running_url, mock_post_json_data, mock_log_parser, mock_get_resultset, mock_get_remote_content): """ a new buildapi running job creates a new obj in the job table """ from treeherder.etl.buildapi import RunningJobsProcess etl_process = RunningJobsProcess() etl_process.run() stored_obj = jm.get_jobs_dhub().execute( proc="jobs_test.selects.jobs") jm.disconnect() assert len(stored_obj) == 1
def test_ingest_running_jobs_1_missing_resultset( jm, initial_data, sample_resultset, test_repository, mock_buildapi_running_missing1_url, mock_post_json_data, mock_get_resultset, mock_get_remote_content, activate_responses): """ Ensure the running job with the missing resultset is queued for refetching """ from treeherder.etl.buildapi import RunningJobsProcess etl_process = RunningJobsProcess() _do_missing_resultset_test(jm, etl_process)
def test_ingest_running_jobs(jm, initial_data, mock_buildapi_running_url, mock_post_json, mock_log_parser, mock_get_resultset, mock_get_remote_content): """ a new buildapi running job creates a new obj in the job table """ etl_process = RunningJobsProcess() new_jobs_were_added = etl_process.run() assert new_jobs_were_added is True assert cache.get(CACHE_KEYS['running']) == set([24767134]) new_jobs_were_added = etl_process.run() assert new_jobs_were_added is False stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") jm.disconnect() assert len(stored_obj) == 1
def test_ingest_running_to_complete_job(jm, result_set_stored, mock_buildapi_running_url, mock_buildapi_builds4h_url, mock_log_parser): """ a new buildapi running job transitions to a new completed job """ etl_process = RunningJobsProcess() etl_process.run() stored_running = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_running) == 1 # the first job in the sample data should overwrite the running job # we just ingested. Leaving us with only 32 jobs, not 33. etl_process = Builds4hJobsProcess() etl_process.run() stored_obj = jm.get_dhub().execute(proc="jobs_test.selects.jobs") assert len(stored_obj) == 32 # all jobs should be completed, including the original one which # transitioned from running. for job in stored_obj: assert job['state'] == 'completed'
def _handle(self, *args, **options): project = options['project'] changeset = options['changeset'] if not options['last_n_pushes'] and not changeset: raise CommandError('must specify --last-n-pushes or a positional ' 'changeset argument') # get reference to repo repo = Repository.objects.get(name=project, active_status='active') if options['last_n_pushes']: last_push_id = last_push_id_from_server(repo) fetch_push_id = max(1, last_push_id - options['last_n_pushes']) logger.info('last server push id: %d; fetching push %d and newer' % (last_push_id, fetch_push_id)) else: fetch_push_id = None # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. push_sha = process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id) # Only perform additional processing if fetching a single changeset # because we only have the sha1 if the tip-most push in "last N pushes" # mode and can't filter appropriately. if not fetch_push_id: group_filter = options['filter_job_group'] Builds4hJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=group_filter) PendingJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=group_filter) RunningJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=group_filter)
def _handle(self, *args, **options): if len(args) != 2: raise CommandError("Need to specify (only) branch and changeset") (project, changeset) = args # get reference to repo rdm = RefDataManager() repos = filter(lambda x: x['name'] == project, rdm.get_all_repository_info()) if not repos: raise CommandError("No project found named '%s'" % project) repo = repos[0] # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1' % repo['url'] # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA, but # job ingestion expects the short version, so we truncate it. push_sha = process.run(pushlog_url, project, changeset=changeset)[0:12] self._process_all_objects_for_project(project) Builds4hJobsProcess().run(filter_to_project=project, filter_to_revision=push_sha, filter_to_job_group=options['filter_job_group']) PendingJobsProcess().run(filter_to_project=project, filter_to_revision=push_sha, filter_to_job_group=options['filter_job_group']) RunningJobsProcess().run(filter_to_project=project, filter_to_revision=push_sha, filter_to_job_group=options['filter_job_group']) self._process_all_objects_for_project(project)
def fetch_buildapi_running(): """ Fetches the buildapi running jobs api and load them """ RunningJobsProcess().run()
def fetch_buildapi_running(): """ Fetches the buildapi running jobs api and load them to the objectstore ingestion endpoint """ RunningJobsProcess().run()