def test_correct_and_wrong_cluster_script_in_list(self): wrong_cluster_script_filename = "999999.dyn-dmp.x01xx012.16.sh" correct_cluster_script_filename = "1234567.dyn-dmp.x99xx123.16.sh" file_list = [ wrong_cluster_script_filename, correct_cluster_script_filename ] self.assertEqual( get_cluster_script_from_list(job_id=1234567, file_list=file_list), correct_cluster_script_filename)
def get_job_status_and_job_dir_from_sub_dir(job_id, sub_dir, recent=False): """ Get job_status for job_id from sub_dir If a job is recent, the `sub_dir` will be checked upto 3 times for the status. This feature is implemented because it can happen that the job switches from pending to running and that the pending folder has already disappeared but the cluster script is not yet created. Checking this only for recent jobs makes sense because this function will be called soon after the job has been submitted (and is thus pending). If a job is old and there is no pending folder, no cluster script and no finished folder when this function is called the job has probably been deleted or moved. Old jobs are typically only processed when the polling script is restarted and thus the information about previously checked joblogfiles is lost. Parameters ---------- job_id : int Job id to check the status for sub_dir : str Path of the directory where the job was submitted recent : boolean Datetime in joblogfile points to recent submission. If `True`, then `sub_dir` is checked for `job_status` upto 3 times. Otherwise, the `sub_dir` is only checked once. Default is False. Returns ------- job_status : str Job status short string as defined in the Job model. job_dir : None or str None if no job dir (or job_status) could be determined, otherwise the directory path where the job data is currently located. """ logger = logging.getLogger(__name__).getChild( "get_job_status_and_job_dir_from_sub_dir") copy_logger_settings(__name__, "utils.caefileio.clusterscript") logger.info("Getting job_status and job_dir from sub_dir: {}".format( sub_dir)) # Setting default return job_status = None job_dir = None checks_counter = 0 if recent: checks_limit = 3 else: checks_limit = 1 while checks_counter < checks_limit and job_status is None: checks_counter += 1 if checks_counter > 1: time.sleep(1) logger.debug("Re-checking sub_dir for job_status and job_dir.") else: logger.debug("Checking sub_dir for job_status and job_dir.") logger.debug(" {}/{} checks.".format(checks_counter, checks_limit)) try: sub_dir_content = os.listdir(sub_dir) logger.debug( "Content of sub_dir: {}".format(sorted(sub_dir_content))) except NotADirectoryError as err_msg: logger.warning(f"sub_dir is not a directory: {err_msg}") except FileNotFoundError as err_msg: logger.info(f"sub_dir not found: {err_msg}") except PermissionError as err_msg: logger.info(f"No access to sub_dir: {err_msg}") except OSError as err_msg: logger.warning( f"OSError occurred. Not sure what causes it: {err_msg}") else: finished_job_foldername = str(job_id) pending_job_foldername = str(job_id) + ".pending" cluster_script_filename = get_cluster_script_from_list( job_id=job_id, file_list=sub_dir_content) # Order finish-running-pending makes sense, because if maybe # there are some leftovers (like files) from the previous stage # I will not check for them. if finished_job_foldername in sub_dir_content: logger.debug("Finished job folder name in sub_dir!") job_status = Job.JOB_STATUS_FINISHED job_dir = os.path.join(sub_dir, finished_job_foldername) elif cluster_script_filename is not None: job_status = Job.JOB_STATUS_RUNNING job_dir = get_cluster_scratch_dir_from_script( os.path.join(sub_dir, cluster_script_filename)) elif pending_job_foldername in sub_dir_content: job_status = Job.JOB_STATUS_PENDING job_dir = os.path.join(sub_dir, pending_job_foldername) logger.debug( "Pending job folder name in sub dir: {}".format(job_dir)) # If none of the above was successful, the job folder might be # renamed. This is checked here. It has to be after the pending # check because it maybe extended with anything. else: renamed_job_folder = get_renamed_job_folder_from_list( job_id=job_id, file_list=sub_dir_content) if renamed_job_folder: renamed_job_folder_path = os.path.join( sub_dir, renamed_job_folder) logger.debug( "Possibly found renamed job folder: {}".format( renamed_job_folder)) if os.path.isdir(renamed_job_folder_path): logger.debug("Renamed job folder is dir: {}".format( renamed_job_folder_path)) logger.debug("Assuming finished job.") job_status = Job.JOB_STATUS_FINISHED job_dir = renamed_job_folder_path if job_status is not None and checks_counter > 1: logger.debug( "=" * 80 + "\nRe-checking sub_dir is worth it!\n" + ("=" * 80)) if job_status is not None and job_dir is not None: if os.path.isdir(job_dir): logger.info("job_status determined from sub_dir: {}".format( job_status)) logger.info("job_dir determined from sub_dir: {}".format(job_dir)) return job_status, job_dir else: logger.error("Found job_dir is not a directory: {}".format( job_dir)) logger.info("No job_status or job_dir could be determined from sub_dir") return Job.JOB_STATUS_NONE, None
def test_wrong_cluster_script_in_list(self): cluster_script_filename = "999999.dyn-dmp.l01cl012.16.sh" file_list = [cluster_script_filename] self.assertIsNone( get_cluster_script_from_list(job_id=1234567, file_list=file_list))
def test_another_correct_cluster_script_in_list(self): cluster_script_filename = "1234567.dyn-dmp.x99xx012.8.sh" file_list = [cluster_script_filename] self.assertEqual( get_cluster_script_from_list(job_id=1234567, file_list=file_list), cluster_script_filename)
def test_only_correct_cluster_script_in_list_pam_2(self): cluster_script_filename = "1234567.pam-dmp.x12xx123.16.sh" file_list = [cluster_script_filename] self.assertEqual( get_cluster_script_from_list(job_id=1234567, file_list=file_list), cluster_script_filename)
def test_empty_list(self): file_list = [] self.assertIsNone( get_cluster_script_from_list(job_id=7654321, file_list=file_list))