def get_queryset(self, request): qs = super(RunningJobAdmin, self).get_queryset(request) return qs.exclude(state__in=[ Failed.instance().name, Completed.instance().name, CompletedWithWarning.instance().name ])
def run_all_jobs(first_run=True): """ run all jobs sequentially """ succeed_jobs = 0 failed_jobs = 0 ignored_jobs = 0 error_jobs = 0 for j in Job.objects.exclude( state__in=[Failed.instance().name, Completed.instance().name]).order_by('id'): try: JobStatemachine.run(j, first_run) if j.state == "Completed": if j.launched is None: ignored_jobs += 1 else: succeed_jobs += 1 elif j.state == "Failed": failed_jobs += 1 else: error_jobs += 1 except: logger.error( "job(id={0},name={1}) runs into a exception{2}".format( j.id, j.publish.name, JobState.get_exception_message())) error_jobs += 1 return (succeed_jobs, failed_jobs, ignored_jobs, error_jobs)
def run_all_jobs(first_run=True): """ run all jobs sequentially """ succeed_jobs = 0 failed_jobs = 0 ignored_jobs = 0 error_jobs = 0 for j in Job.objects.exclude(state__in = [Failed.instance().name,Completed.instance().name]).order_by('id'): try: JobStatemachine.run(j,first_run) if j.state == "Completed": if j.launched is None: ignored_jobs += 1 else: succeed_jobs += 1 elif j.state == "Failed": failed_jobs += 1 else: error_jobs += 1 except: logger.error("job(id={0},name={1}) runs into a exception{2}".format(j.id,j.publish.name,JobState.get_exception_message())) error_jobs += 1 return (succeed_jobs,failed_jobs,ignored_jobs,error_jobs)
def get_queryset(self, request): qs = super(EffectiveJobAdmin, self).get_queryset(request) return qs.exclude(state__in=[ Failed.instance().name, Completed.instance().name, CompletedWithWarning.instance().name ], launched=None)
def sync_status(self, o): if o.state in [ Completed.instance().name, CompletedWithWarning.instance().name ] and o.launched: return "<a href='/monitor/publishsyncstatus/?q={0}'>Sync status</a>".format( o.id) else: return ""
def run_all_jobs(first_run=True): """ run all jobs sequentially """ for j in Job.objects.exclude(state__in = [Failed.instance().name,Completed.instance().name]).order_by('id'): try: JobStatemachine.run(j,first_run) except: logger.error("job(id={0},name={1}) runs into a exception{2}".format(j.id,j.publish.name,JobState.get_exception_message()))
def clean(self): """ clean the outdated jobs. """ #import ipdb;ipdb.set_trace() #find all publishes which has published at least one time outdated_date = None if self.expire_days: outdated_date = timezone.now() - timedelta(self.expire_days) self.logger.info( "Begin to clean the jobs finished before {0}, but at least {1} latest successful jobs for each publish will be preserved." .format(outdated_date, self.min_jobs)) else: self.logger.info( "Begin to clean all jobs, except {1} latest successful jobs for each publish" .format(outdated_date, self.min_jobs)) deleted_jobs = 0 for p in Publish.objects.filter(job_id__isnull=False): #get the earlist job which should be kept. earliest_job = None try: earliest_job = p.job_set.filter( state=Completed.instance().name, launched__isnull=False).order_by('-finished')[self.min_jobs - 1] except IndexError: #the number of existing jobs is less than min_jobs, no need to clean jobs. continue jobs = p.job_set.filter(pk__lt=earliest_job.pk) if self.expire_days: #if spefified expire days, only expired jobs will be deleted jobs = jobs.filter(finished__lt=outdated_date) #find all the publish's jobs and delete it. for j in jobs: #check whether this job is referenced by Input or Normalise if Input.objects.filter( job_id=j.pk).exists() or Normalise.objects.filter( job_id=j.pk).exists(): #still referenced by input or normalise, can not delete continue j.delete() deleted_jobs += 1 self.logger.debug("Delete outdated job({0})".format(j.pk)) if deleted_jobs == 1: self.logger.info( "{0} outdated job has been deleted.".format(deleted_jobs)) elif deleted_jobs > 1: self.logger.info( "{0} outdated job have been deleted.".format(deleted_jobs)) else: self.logger.info("Not find any outdated jobs.")
def run_all_jobs(first_run=True): """ run all jobs sequentially """ for j in Job.objects.exclude( state__in=[Failed.instance().name, Completed.instance().name]).order_by('id'): try: JobStatemachine.run(j, first_run) except: logger.error( "job(id={0},name={1}) runs into a exception{2}".format( j.id, j.publish.name, JobState.get_exception_message()))
def clean(self): """ clean the outdated jobs. """ #find all publishes which has published at least one time outdated_date = None if self.expire_days: outdated_date = timezone.localtime(timezone.now()) - timedelta(self.expire_days) self.logger.info("Begin to clean the jobs finished before {0}, but at least {1} latest successful jobs for each publish will be preserved.".format(outdated_date,self.min_jobs)) else: self.logger.info("Begin to clean all jobs, except {1} latest successful jobs for each publish".format(outdated_date,self.min_jobs)) deleted_jobs = 0 for p in Publish.objects.filter(job_id__isnull = False): #get the earlist job which should be kept. earliest_job = None try: earliest_job = p.job_set.filter(state=Completed.instance().name,launched__isnull=False).order_by('-finished')[self.min_jobs - 1] except IndexError: #the number of existing jobs is less than min_jobs, no need to clean jobs. continue jobs = p.job_set.filter(pk__lt = earliest_job.pk) if self.expire_days: #if spefified expire days, only expired jobs will be deleted jobs = jobs.filter(finished__lt = outdated_date) #find all the publish's jobs and delete it. for j in jobs: #check whether this job is referenced by Input or Normalise if Input.objects.filter(job_id=j.pk).exists() or Normalise.objects.filter(job_id=j.pk).exists(): #still referenced by input or normalise, can not delete continue; j.delete() deleted_jobs += 1 self.logger.debug("Delete outdated job({0})".format(j.pk)) outdated_date = timezone.now() - timedelta(7) for j in Job.objects.filter(publish__isnull = True,created__lt = outdated_date): j.delete() deleted_jobs += 1 self.logger.debug("Delete outdated job({0})".format(j.pk)) if deleted_jobs == 1: self.logger.info("{0} outdated job has been deleted.".format(deleted_jobs)) elif deleted_jobs > 1: self.logger.info("{0} outdated job have been deleted.".format(deleted_jobs)) else: self.logger.info("Not find any outdated jobs.") return deleted_jobs
def sync_status(self,o): if o.state == Completed.instance().name and o.launched: return "<a href='/monitor/publishsyncstatus/?q={0}'>Sync status</a>".format(o.id) else: return ""
def get_queryset(self,request): qs = super(EffectiveJobAdmin,self).get_queryset(request) return qs.exclude(state__in = [Failed.instance().name,Completed.instance().name],launched=None)
def get_queryset(self,request): qs = super(RunningJobAdmin,self).get_queryset(request) return qs.exclude(state__in = [Failed.instance().name,Completed.instance().name])
def execute(self, job, previous_state): """ The job will continue to wait, if 1. If the publish is still in a running harvest 2. some dependent input is failed with the same batch id 3. some dependent normalise is failed with the same batch id 4. some dependent input is harvested by other jobs with different batch_id and still dependent by other jobs. """ #import ipdb;ipdb.set_trace() if job.publish.running > 0: #havest job for the same publish is still running. return (HarvestStateOutcome.failed, "Harvest job for the same publish is still running.") else: result = None #if some inputs already failed, then the job will continue to wait for o in job.inputs: if o.job_batch_id and o.job_batch_id == job.batch_id: #input is already executed by the job belonging to the same job batch if o.job_status: #execute successful pass else: #execute failed try: j = Job.objects.get(pk=o.job_id) if j.state in [ Failed.instance().name, Completed.instance().name, CompletedWithWarning.instance().name ]: #failed job already finished. current job can execute pass else: #failed job is still running, current job must wait until the failed job execute successfully or cancelled result = (HarvestStateOutcome.failed, o.job_message) break except: #failed job can not found, current job can execute pass elif o.job_batch_id: #input is already executed by the job belonging to different job batch, dependent_jobs = [] for j in Job.objects.filter( batch_id=o.job_batch_id).exclude(state__in=[ Failed.instance().name, Completed.instance().name, CompletedWithWarning.instance().name ]): for i in j.inputs: if i.id == o.id: #input is used by other running jobs, the current job will continue to wait dependent_jobs.append({ "id": j.id, "batch_id": j.batch_id, "publish": j.publish.table_name, "state": j.state }) if dependent_jobs: #still have some running harvest job dependents on the inputed data. the current job must wait until all dependent job finished. result = ( HarvestStateOutcome.failed, "The dependent input {0} is still used by running jobs {1}" .format(o.name, dependent_jobs)) break else: #input is not executed before or no job is dependent on it. pass if result: #already failed return result #if some normalise already failed, then the job will continue to wait for o in job.normalises: if o.job_batch_id and o.job_batch_id == job.batch_id: #normalise is already executed if o.job_status: #executed successful pass else: #executed failed try: j = Job.objects.get(pk=o.job_id) if j.state in [ Failed.instance().name, Completed.instance().name, CompletedWithWarning.instance().name ]: #failed job already cancelled. current job can execute pass else: #failed job is still running, current job must wait until the failed job execute successfully or cancelled result = (HarvestStateOutcome.failed, o.job_message) break except: #failed job can not found, current job can execute pass else: #normalise is not executed before pass if not result: result = (HarvestStateOutcome.succeed, None) if job.publish.is_up_to_date(job): #publis is up to date, no need to run. if (job.is_manually_created): result = ( HarvestStateOutcome.succeed, "Publish is up to date, but forced by custodian") else: return ( HarvestStateOutcome.up_to_date, "Publish is up to date, no need to publish again.") return result
def execute(self, job, previous_state): """ igore the input if it is already imported with the same batchid , failed if some input is failed with the same batchid. """ self._pre_execute(job, previous_state) result = None job_state = None #go through all outdated input tables to import. for o in self._input_tables(job, previous_state): if o.job_batch_id and o.job_batch_id == job.batch_id: #input table already executed by a job belonging to the same batch job_state = HarvestState.get_jobstate(o.job_state) if job_state == self: #this input table is on the same state. if o.job_status: #already executed successfully continue elif o.job_id == job.id: #faild by the same job. execute it again. pass else: #failed by other job, check whether the failed job is still running or finished. try: j = Job.objects.get(pk=o.job_id) if j.state in [ Failed.instance().name, Completed.instance().name, CompletedWithWarning.instance().name ]: #failed job has been failed or completed, current job can execute again pass else: #failed job is still running, current job must wait until the failed job cancelled or execute successfully. result = (HarvestStateOutcome.failed, o.job_message) break except: #failed job can not found, current job can execute again. pass elif self.is_upstate(job_state): #this input table is on a state after the current state, the current state should have been executed successfully. continue else: #this input table is on a state before the current state if o.job_status: #execute the current state pass else: #In general, it is impossible to reach here. #because the logic can go here only when the previous state has been executed successfully. result = (HarvestStateOutcome.failed, o.job_message) break #execute try: result = self._execute(job, previous_state, o) if result and result[0] != JobStateOutcome.succeed: #failed o.job_status = False o.job_message = result[1] break else: #update the status in input table to prevent other job execute it again o.job_status = True o.job_message = result[ 1] if result and result[1] else 'Succeed' except KeyboardInterrupt: result = (HarvestStateOutcome.shutdown, self.get_exception_message()) #update the status in input table to prevent other job execute it again o.job_status = False o.job_message = result[1] break except SystemExit: result = (HarvestStateOutcome.shutdown, self.get_exception_message()) #update the status in input table to prevent other job execute it again o.job_status = False o.job_message = result[1] break except: result = (HarvestStateOutcome.failed, self.get_exception_message()) #update the status in input table to prevent other job execute it again o.job_status = False o.job_message = result[1] break finally: o.job_state = self.name o.job_batch_id = job.batch_id o.job_id = job.id o.save(update_fields=[ 'job_state', 'job_status', 'job_message', 'job_batch_id', 'job_id' ]) if not result: result = (HarvestStateOutcome.succeed, None) return result
def execute(self,job,previous_state): """ The job will continue to wait, if 1. If the publish is still in a running harvest 2. some dependent input is failed with the same batch id 3. some dependent normalise is failed with the same batch id 4. some dependent input is harvested by other jobs with different batch_id and still dependent by other jobs. """ #import ipdb;ipdb.set_trace() if job.publish.running > 0: #havest job for the same publish is still running. return (HarvestStateOutcome.failed, "Harvest job for the same publish is still running.") else: result = None #if some inputs already failed, then the job will continue to wait for o in job.inputs: if o.job_batch_id and o.job_batch_id == job.batch_id: #input is already executed by the job belonging to the same job batch if o.job_status: #execute successful pass else: #execute failed try: j = Job.objects.get(pk = o.job_id) if j.state in [Failed.instance().name,Completed.instance().name]: #failed job already cancelled. current job can execute pass else: #failed job is still running, current job must wait until the failed job execute successfully or cancelled result = (HarvestStateOutcome.failed,o.job_message) break except: #failed job can not found, current job can execute pass elif o.job_batch_id: #input is already executed by the job belonging to different job batch, dependent_jobs = [] for j in Job.objects.filter(batch_id = o.job_batch_id).exclude(state__in = [Failed.instance().name,Completed.instance().name]): for i in j.inputs: if i.id == o.id: #input is used by other running jobs, the current job will continue to wait dependent_jobs.append({"id":i.id, "publish":i.name, "state": i.state}) if dependent_jobs: #still have some running harvest job dependents on the inputed data. the current job must wait until all dependent job finished. result = (HarvestStateOutcome.failed,"The dependent input {0} is still used by running jobs {1}".format(o.name, dependent_jobs)) break else: #input is not executed before or no job is dependent on it. pass if result: #already failed return result #if some normalise already failed, then the job will continue to wait for o in job.normalises: if o.job_batch_id and o.job_batch_id == job.batch_id: #normalise is already executed if o.job_status: #executed successful pass else: #executed failed try: j = Job.objects.get(pk = o.job_id) if j.state in [Failed.instance().name,Completed.instance().name]: #failed job already cancelled. current job can execute pass else: #failed job is still running, current job must wait until the failed job execute successfully or cancelled result = (HarvestStateOutcome.failed,o.job_message) break except: #failed job can not found, current job can execute pass else: #normalise is not executed before pass if not result: result = (HarvestStateOutcome.succeed,None) if job.publish.is_up_to_date(job): #publis is up to date, no need to run. if (job.is_manually_created): result = (HarvestStateOutcome.succeed, "Publish is up to date, but forced by custodian") else: return (HarvestStateOutcome.up_to_date,"Publish is up to date, no need to publish again.") return result
def execute(self,job,previous_state): """ igore the input if it is already imported with the same batchid , failed if some input is failed with the same batchid. """ self._pre_execute(job,previous_state) result = None job_state = None #go through all outdated input tables to import. for o in self._input_tables(job,previous_state): if o.job_batch_id and o.job_batch_id == job.batch_id: #input table already executed by a job belonging to the same batch job_state = HarvestState.get_jobstate(o.job_state) if job_state == self: #this input table is on the same state. if o.job_status: #already executed successfully continue elif o.job_id == job.id: #faild by the same job. execute it again. pass else: #failed by other job, check whether the failed job is still running or finished. try: j = Job.objects.get(pk=o.job_id) if j.state in [Failed.instance().name,Completed.instance().name]: #failed job has been failed or completed, current job can execute again pass else: #failed job is still running, current job must wait until the failed job cancelled or execute successfully. result = (HarvestStateOutcome.failed,o.job_message) break except: #failed job can not found, current job can execute again. pass elif self.is_upstate(job_state): #this input table is on a state after the current state, the current state should have been executed successfully. continue else: #this input table is on a state before the current state if o.job_status: #execute the current state pass else: #In general, it is impossible to reach here. #because the logic can go here only when the previous state has been executed successfully. result = (HarvestStateOutcome.failed,o.job_message) break #execute try: result = self._execute(job,previous_state,o) if result and result[0] != JobStateOutcome.succeed: #failed o.job_status = False o.job_message = result[1] break else: #update the status in input table to prevent other job execute it again o.job_status = True o.job_message = 'Succeed' except: result = (HarvestStateOutcome.failed, self.get_exception_message()) #update the status in input table to prevent other job execute it again o.job_status = False o.job_message = result[1] break finally: o.job_state = self.name o.job_batch_id = job.batch_id o.job_id = job.id o.save(update_fields=['job_state','job_status','job_message','job_batch_id','job_id']) if not result: result = (HarvestStateOutcome.succeed,None) return result