def run(): #Set up logging, with the appropriate log level logging.basicConfig(filename=settings.LOG_FILE,level=settings.LOG_LEVEL, format='%(asctime)s::%(levelname)s::%(message)s', datefmt='%Y-%m-%d, %H:%M:%S') #Step one, load the jobs that have been confirmed, and need submitting to condor : new_jobs = models.Job.objects.filter(status='N') for job in new_jobs: logging.debug('New job found: ' + str(job.id) + ', user: '******'s been set, use it. Otherwise set to 0 if job.rank != None or job.rank != '': rank = job.rank else: rank = '0' if job.job_type == 'SO': condor_jobs = model.prepare_so_condor_jobs(rank=rank) elif job.job_type == 'SS': no_of_jobs = model.prepare_ss_task(job.runs, skip_load_balancing=job.skip_load_balancing) condor_jobs = model.prepare_ss_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'PS': no_of_jobs = model.prepare_ps_jobs(skip_load_balancing=job.skip_load_balancing) condor_jobs = model.prepare_ps_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'OR': no_of_jobs = model.prepare_or_jobs(job.runs, skip_load_balancing=job.skip_load_balancing) condor_jobs = model.prepare_or_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'PR': no_of_jobs = model.prepare_pr_jobs(job.runs, skip_load_balancing=job.skip_load_balancing, custom_report=job.custom_report) condor_jobs = model.prepare_pr_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'SP': ##ALTER no_of_jobs = model.prepare_sp_jobs(job.runs, skip_load_balancing = job.skip_load_balancing, custom_report=False) condor_jobs = model.prepare_sp_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'OD': #No need to prepare the job. This was done as the job was submitted condor_jobs = model.prepare_od_condor_jobs(rank=rank) elif job.job_type == 'RW': no_of_jobs = model.prepare_rw_jobs(job.runs) condor_jobs = model.prepare_rw_condor_jobs(no_of_jobs, job.raw_mode_args, rank=rank) else: continue for cj in condor_jobs: try: condor_job_id = condor_submit(cj['spec_file'], username=str(job.user.username)) #Check that the condor job was submitted successfully assert condor_job_id != -1 condor_job = models.CondorJob(parent=job, spec_file=cj['spec_file'], std_output_file=cj['std_output_file'], std_error_file = cj['std_error_file'], log_file=cj['log_file'], job_output=cj['job_output'], queue_status='Q', queue_id=condor_job_id) condor_job.save() if job.condor_jobs == None: job.condor_jobs = 1 else: job.condor_jobs += 1 except: logging.exception('Error submitting job(s) to Condor; ensure condor scheduler service is running. Job: ' + str(job.id) + ', User: '******'Submitted ' + str(len(condor_jobs)) + ' to Condor') job.status = 'S' job.last_update=datetime.datetime.today() job.save() except Exception, e: logging.warning('Error preparing job for condor submission. Job: ' + str(job.id) + ', User: '******'Exception: ' + str(e)) job.status = 'E' job.last_update=datetime.datetime.today() job.finish_time=datetime.datetime.today() try: zip_up_dir(job) except: logging.exception('Exception: could not zip up job directory for job ' + str(job.id)) job.save() try: email_notify.send_email(job) except: logging.exception('Exception: error sending email')
def run(): #Set up logging, with the appropriate log level logging.basicConfig(filename=settings.LOG_FILE, level=settings.LOG_LEVEL, format='%(asctime)s::%(levelname)s::%(message)s', datefmt='%Y-%m-%d, %H:%M:%S') #Step one, load the jobs that have been confirmed, and need submitting to condor : new_jobs = models.Job.objects.filter(status='N') for job in new_jobs: logging.debug('New job found: ' + str(job.id) + ', user: '******'s been set, use it. Otherwise set to 0 if job.rank != None or job.rank != '': rank = job.rank else: rank = '0' if job.job_type == 'SO': condor_jobs = model.prepare_so_condor_jobs(rank=rank) elif job.job_type == 'SS': no_of_jobs = model.prepare_ss_task( job.runs, skip_load_balancing=job.skip_load_balancing) condor_jobs = model.prepare_ss_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'PS': no_of_jobs = model.prepare_ps_jobs( skip_load_balancing=job.skip_load_balancing) condor_jobs = model.prepare_ps_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'OR': no_of_jobs = model.prepare_or_jobs( job.runs, skip_load_balancing=job.skip_load_balancing) condor_jobs = model.prepare_or_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'PR': no_of_jobs = model.prepare_pr_jobs( job.runs, skip_load_balancing=job.skip_load_balancing, custom_report=job.custom_report) condor_jobs = model.prepare_pr_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'SP': ##ALTER no_of_jobs = model.prepare_sp_jobs( job.runs, skip_load_balancing=job.skip_load_balancing, custom_report=False) condor_jobs = model.prepare_sp_condor_jobs(no_of_jobs, rank=rank) elif job.job_type == 'OD': #No need to prepare the job. This was done as the job was submitted condor_jobs = model.prepare_od_condor_jobs(rank=rank) elif job.job_type == 'RW': no_of_jobs = model.prepare_rw_jobs(job.runs) condor_jobs = model.prepare_rw_condor_jobs(no_of_jobs, job.raw_mode_args, rank=rank) else: continue for cj in condor_jobs: try: condor_job_id = condor_submit(cj['spec_file'], username=str( job.user.username)) #Check that the condor job was submitted successfully assert condor_job_id != -1 condor_job = models.CondorJob( parent=job, spec_file=cj['spec_file'], std_output_file=cj['std_output_file'], std_error_file=cj['std_error_file'], log_file=cj['log_file'], job_output=cj['job_output'], queue_status='Q', queue_id=condor_job_id) condor_job.save() if job.condor_jobs == None: job.condor_jobs = 1 else: job.condor_jobs += 1 except: logging.exception( 'Error submitting job(s) to Condor; ensure condor scheduler service is running. Job: ' + str(job.id) + ', User: '******'Submitted ' + str(len(condor_jobs)) + ' to Condor') job.status = 'S' job.last_update = datetime.datetime.today() job.save() except Exception, e: logging.warning( 'Error preparing job for condor submission. Job: ' + str(job.id) + ', User: '******'Exception: ' + str(e)) job.status = 'E' job.last_update = datetime.datetime.today() job.finish_time = datetime.datetime.today() try: zip_up_dir(job) except: logging.exception( 'Exception: could not zip up job directory for job ' + str(job.id)) job.save() try: email_notify.send_email(job) except: logging.exception('Exception: error sending email')
elif condor_job.queue_status == 'E': logging.warning('Condor job id ' + str(condor_job.queue_id) + ' held') error = True break if error: logging.warning('Job: ' + str(job.id) + ', User: '******' did not complete successfully') job.status='E' job.finish_time=datetime.datetime.today() job.last_update=datetime.datetime.today() try: zip_up_dir(job) except: logging.exception('Exception: could not zip up job directory for job ' + str(job.id)) job.save() try: email_notify.send_email(job) except: logging.exception('Exception: error sending email') elif not still_running: logging.debug('Job ' + str(job.id) + ', User: '******' finished processing on condor') #Open the log file and check the exit status failed_job_count = 0 #keep a count of the total run time for the job total_run_time = 0.0 for condor_job in condor_jobs: try: filename=os.path.join(condor_job.getDirectory(), condor_job.log_file) log = condor_log.Log(filename) assert log.termination_status == 0 #While we're here, update the CondorJob run time
if error: logging.warning('Job: ' + str(job.id) + ', User: '******' did not complete successfully') job.status = 'E' job.finish_time = datetime.datetime.today() job.last_update = datetime.datetime.today() try: zip_up_dir(job) except: logging.exception( 'Exception: could not zip up job directory for job ' + str(job.id)) job.save() try: email_notify.send_email(job) except: logging.exception('Exception: error sending email') elif not still_running: logging.debug('Job ' + str(job.id) + ', User: '******' finished processing on condor') #Open the log file and check the exit status failed_job_count = 0 #keep a count of the total run time for the job total_run_time = 0.0 for condor_job in condor_jobs: try: filename = os.path.join(condor_job.getDirectory(), condor_job.log_file) log = condor_log.Log(filename)