def submit_jobs( self, username, password, server, project, subject, session, structural_reference_project, structural_reference_session, put_server, clean_output_resource_first, setup_script, incomplete_only, scan, walltime_limit_hours, mem_limit_gbs, # UNUSED vmem_limit_gbs): subject_info = hcp7t_subject.Hcp7TSubjectInfo( project, structural_reference_project, subject) # determine names of preprocessed resting state scans that are # available for the subject preproc_resting_state_scan_names = self.archive.available_resting_state_preproc_names( subject_info) inform("Preprocessed resting state scans available for subject: " + str(preproc_resting_state_scan_names)) # determine names of the preprocessed MOVIE task scans that are available for the subject preproc_movie_scan_names = self.archive.available_movie_preproc_names( subject_info) inform("Preprocessed movie scans available for subject " + str(preproc_movie_scan_names)) # determine names of the FIX processed scans that are available for the subject fix_processed_scan_names = self.archive.available_FIX_processed_names( subject_info) inform("FIX processed scans available for subject " + str(fix_processed_scan_names)) # build list of scans to process scan_list = [] if scan is None: scan_list = fix_processed_scan_names else: scan_list.append(scan) # process specified scans for scan_name in scan_list: if incomplete_only: completion_checker = PostFixHCP7T_OneSubjectCompletionChecker.PostFixHCP7T_OneSubjectCompletionChecker( ) if completion_checker.is_processing_complete( self.archive, subject_info, scan_name): inform("scan: " + scan_name + " has already completed PostFixHCP7T processing") inform( "Only submitting jobs for incomplete scans - skipping " + scan_name) continue inform("scan_name: " + scan_name) long_scan_name = self.archive.functional_scan_long_name(scan_name) output_resource_name = self.archive.PostFix_processed_resource_name( scan_name) inform("") inform("-------------------------------------------------") inform("Submitting jobs for scan: " + long_scan_name) inform("Output resource name: " + output_resource_name) inform("-------------------------------------------------") inform("") # make sure working directories don't have the same name based on the # same start time by sleeping a few seconds time.sleep(5) current_seconds_since_epoch = int(time.time()) working_directory_name = self.build_home working_directory_name += os.sep + project working_directory_name += os.sep + self.PIPELINE_NAME working_directory_name += '.' + subject working_directory_name += '.' + long_scan_name working_directory_name += '.' + str(current_seconds_since_epoch) # make the working directory inform("Making working directory: " + working_directory_name) os.makedirs(name=working_directory_name) # get JSESSION ID jsession_id = xnat_access.get_jsession_id( server=os_utils.getenv_required('XNAT_PBS_JOBS_XNAT_SERVER'), username=username, password=password) inform("jsession_id: " + jsession_id) # get XNAT Session ID (a.k.a. the experiment ID, e.g. ConnectomeDB_E1234) xnat_session_id = xnat_access.get_session_id( server=os_utils.getenv_required('XNAT_PBS_JOBS_XNAT_SERVER'), username=username, password=password, project=project, subject=subject, session=session) inform("xnat_session_id: " + xnat_session_id) # get XNAT Workflow ID workflow_obj = xnat_access.Workflow(username, password, server, jsession_id) workflow_id = workflow_obj.create_workflow(xnat_session_id, project, self.PIPELINE_NAME, 'Queued') inform("workflow_id: " + workflow_id) # Clean the output resource if requested if clean_output_resource_first: inform("Deleting resource: " + output_resource_name + " for:") inform(" project: " + project) inform(" subject: " + subject) inform(" session: " + session) delete_resource.delete_resource( username, password, str_utils.get_server_name(server), project, subject, session, output_resource_name) script_file_start_name = working_directory_name script_file_start_name += os.sep + subject script_file_start_name += '.' + long_scan_name script_file_start_name += '.' + self.PIPELINE_NAME script_file_start_name += '.' + project script_file_start_name += '.' + session # Create script to submit to do the actual work work_script_name = script_file_start_name + '.XNAT_PBS_job.sh' with contextlib.suppress(FileNotFoundError): os.remove(work_script_name) work_script = open(work_script_name, 'w') nodes_spec = 'nodes=1:ppn=1' walltime_spec = 'walltime=' + str(walltime_limit_hours) + ':00:00' vmem_spec = 'vmem=' + str(vmem_limit_gbs) + 'gb' work_script.write('#PBS -l ' + nodes_spec + ',' + walltime_spec + ',' + vmem_spec + os.linesep) work_script.write('#PBS -o ' + working_directory_name + os.linesep) work_script.write('#PBS -e ' + working_directory_name + os.linesep) work_script.write(os.linesep) work_script.write(self.xnat_pbs_jobs_home + os.sep + '7T' + os.sep + 'PostFixHCP7T' + os.sep + 'PostFixHCP7T.XNAT.sh \\' + os.linesep) work_script.write(' --user="******" \\' + os.linesep) work_script.write(' --password="******" \\' + os.linesep) work_script.write(' --server="' + str_utils.get_server_name(server) + '" \\' + os.linesep) work_script.write(' --project="' + project + '" \\' + os.linesep) work_script.write(' --subject="' + subject + '" \\' + os.linesep) work_script.write(' --session="' + session + '" \\' + os.linesep) work_script.write(' --scan="' + long_scan_name + '" \\' + os.linesep) work_script.write(' --working-dir="' + working_directory_name + '" \\' + os.linesep) work_script.write(' --workflow-id="' + workflow_id + '" \\' + os.linesep) work_script.write(' --setup-script=' + setup_script + os.linesep) work_script.close() os.chmod(work_script_name, stat.S_IRWXU | stat.S_IRWXG) # Create script to put the results into the DB put_script_name = script_file_start_name + '.XNAT_PBS_PUT_job.sh' self.create_put_script(put_script_name, username, password, put_server, project, subject, session, working_directory_name, output_resource_name, scan_name + '_' + self.PIPELINE_NAME) # Submit the job to do the work work_submit_cmd = 'qsub ' + work_script_name inform("work_submit_cmd: " + work_submit_cmd) completed_work_submit_process = subprocess.run( work_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) work_job_no = str_utils.remove_ending_new_lines( completed_work_submit_process.stdout) inform("work_job_no: " + work_job_no) # Submit the job to put the results in the DB put_submit_cmd = 'qsub -W depend=afterok:' + work_job_no + ' ' + put_script_name inform("put_submit_cmd: " + put_submit_cmd) completed_put_submit_process = subprocess.run( put_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) put_job_no = str_utils.remove_ending_new_lines( completed_put_submit_process.stdout) inform("put_job_no: " + put_job_no)
def submit_jobs(self, processing_stage=ProcessingStage.PUT_DATA): """ processing_stage is the last processing stage for which to submit the corresponding job. GET_DATA means just get the data. PROCESS_DATA means get the data and do the processing. PUT_DATA means get the data, processing it, and put the results back in the DB """ logger.debug("submit_jobs processing_stage: " + str(processing_stage)) if self.validate_parameters(): # determine what scans to run the RestingStateStats pipeline on for this subject # TBD: Does this get run on every scan for which the ICAFIX pipeline has been run, # or does it only get run on every resting state scan that has been fix processed. subject_info = hcp7t_subject.Hcp7TSubjectInfo( self.project, self.structural_reference_project, self.subject) fix_processed_scans = self.archive.available_FIX_processed_names( subject_info) fix_processed_scans_set = set(fix_processed_scans) logger.debug("fix_processed_scans_set = " + str(fix_processed_scans_set)) # resting_state_scans = self.archive.available_resting_state_preproc_names(subject_info) # resting_state_scans_set = set(resting_state_scans) # logger.debug("resting_state_scans_set = " + str(resting_state_scans_set)) # scans_to_process_set = resting_state_scans_set & fix_processed_scans_set scans_to_process_set = fix_processed_scans_set scans_to_process = list(scans_to_process_set) scans_to_process.sort() logger.debug("scans_to_process: " + str(scans_to_process)) incomplete_scans_to_process = list() for scan in scans_to_process: if (not is_complete(self.archive, subject_info, scan)): incomplete_scans_to_process.append(scan) logger.debug("incomplete_scans_to_process: " + str(incomplete_scans_to_process)) print("incomplete_scans_to_process:", incomplete_scans_to_process) # for scan in scans_to_process: for scan in incomplete_scans_to_process: logger.info("") logger.info( "--------------------------------------------------") logger.info("Submitting " + self.PIPELINE_NAME + " jobs for") logger.info(" Project: " + self.project) logger.info(" Subject: " + self.subject) logger.info(" Session: " + self.session) logger.info(" Structural Reference Project: " + self.structural_reference_project) logger.info(" Structural Reference Session: " + self.structural_reference_session) logger.info(" Scan: " + scan) logger.info(" Stage: " + str(processing_stage)) logger.info( "--------------------------------------------------") # make sure working directories do not have the same name based on # the same start time by sleeping a few seconds time.sleep(5) # build the working directory name self._working_directory_name = \ self.build_working_directory_name(self.project, self.PIPELINE_NAME, self.subject, scan) logger.info("Making working directory: " + self._working_directory_name) os.makedirs(name=self._working_directory_name) # get JSESSION ID jsession_id = xnat_access.get_jsession_id( server=os_utils.getenv_required( 'XNAT_PBS_JOBS_XNAT_SERVER'), username=self.username, password=self.password) logger.info("jsession_id: " + jsession_id) # get XNAT Session ID (a.k.a. the experiment ID, e.g. ConnectomeDB_E1234) xnat_session_id = xnat_access.get_session_id( server=os_utils.getenv_required( 'XNAT_PBS_JOBS_XNAT_SERVER'), username=self.username, password=self.password, project=self.project, subject=self.subject, session=self.session) logger.info("xnat_session_id: " + xnat_session_id) # get XNAT Workflow ID workflow_obj = xnat_access.Workflow( self.username, self.password, 'https://' + os_utils.getenv_required('XNAT_PBS_JOBS_XNAT_SERVER'), jsession_id) self._workflow_id = workflow_obj.create_workflow( xnat_session_id, self.project, self.PIPELINE_NAME + '_' + scan, 'Queued') logger.info("workflow_id: " + self._workflow_id) # determine output resource name self._output_resource_name = scan + "_RSS" # clean output resource if requested if self.clean_output_resource_first: logger.info("Deleting resource: " + self._output_resource_name + " for:") logger.info(" project: " + self.project) logger.info(" subject: " + self.subject) logger.info(" session: " + self.session) delete_resource.delete_resource( self.username, self.password, str_utils.get_server_name(self.server), self.project, self.subject, self.session, self._output_resource_name) # create scripts for various stages of processing if processing_stage >= ProcessingStage.PREPARE_SCRIPTS: # create script to get data self._create_get_data_script(scan) # create script to do work self._create_work_script(scan) # create script to clean data self._create_clean_data_script(scan) # create script to put the results into the DB put_script_name = self._put_data_script_name(scan) self.create_put_script(put_script_name, self.username, self.password, self.put_server, self.project, self.subject, self.session, self._working_directory_name, self._output_resource_name, self.PIPELINE_NAME + '_' + scan) # submit job to get the data if processing_stage >= ProcessingStage.GET_DATA: get_data_submit_cmd = 'qsub ' + self._get_data_script_name( scan) logger.info("get_data_submit_cmd: " + get_data_submit_cmd) completed_get_data_submit_process = subprocess.run( get_data_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) get_data_job_no = str_utils.remove_ending_new_lines( completed_get_data_submit_process.stdout) logger.info("get_data_job_no: " + get_data_job_no) else: logger.info("Get data job not submitted") # submit job to process the data if processing_stage >= ProcessingStage.PROCESS_DATA: work_submit_cmd = 'qsub -W depend=afterok:' + get_data_job_no + ' ' + self._work_script_name( scan) logger.info("work_submit_cmd: " + work_submit_cmd) completed_work_submit_process = subprocess.run( work_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) work_job_no = str_utils.remove_ending_new_lines( completed_work_submit_process.stdout) logger.info("work_job_no: " + work_job_no) else: logger.info("Process data job not submitted") # submit job to clean the data if processing_stage >= ProcessingStage.CLEAN_DATA: clean_submit_cmd = 'qsub -W depend=afterok:' + work_job_no + ' ' + self._clean_data_script_name( scan) logger.info("clean_submit_cmd: " + clean_submit_cmd) completed_clean_submit_process = subprocess.run( clean_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) clean_job_no = str_utils.remove_ending_new_lines( completed_clean_submit_process.stdout) logger.info("clean_job_no: " + clean_job_no) else: logger.info("Clean data job not submitted") # submit job to put the resulting data in the DB if processing_stage >= ProcessingStage.PUT_DATA: put_submit_cmd = 'qsub -W depend=afterok:' + clean_job_no + ' ' + put_script_name logger.info("put_submit_cmd: " + put_submit_cmd) completed_put_submit_process = subprocess.run( put_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) put_job_no = str_utils.remove_ending_new_lines( completed_put_submit_process.stdout) logger.info("put_job_no: " + put_job_no) else: logger.info("Put data job not submitted") else: logger.info("Unable to submit jobs")
def submit_jobs(self): _debug("submit_jobs") if self.validate_parameters(): # make sure working directories don't have the same name based on the same # start time by sleeping a few seconds time.sleep(5) current_seconds_since_epoch = int(time.time()) working_directory_name = self.build_home working_directory_name += os.sep + self.project working_directory_name += os.sep + self.PIPELINE_NAME working_directory_name += '.' + self.subject working_directory_name += '.' + str(current_seconds_since_epoch) # make the working directory _inform("Making working directory: " + working_directory_name) os.makedirs(name=working_directory_name) # get JSESSION ID jsession_id = xnat_access.get_jsession_id( server=os_utils.getenv_required('XNAT_PBS_JOBS_XNAT_SERVER'), username=self.username, password=self.password) _inform("jsession_id: " + jsession_id) # get XNAT Session ID (a.k.a. the experiment ID, e.g. ConnectomeDB_E1234) xnat_session_id = xnat_access.get_session_id( server=os_utils.getenv_required('XNAT_PBS_JOBS_XNAT_SERVER'), username=self.username, password=self.password, project=self.project, subject=self.subject, session=self.session) _inform("xnat_session_id: " + xnat_session_id) # get XNAT Workflow ID workflow_obj = xnat_access.Workflow( self.username, self.password, self.server, jsession_id) workflow_id = workflow_obj.create_workflow( xnat_session_id, self.project, self.PIPELINE_NAME, 'Queued') _inform("workflow_id: " + workflow_id) # Determine the output resource name output_resource_name = self.archive.DEDRIFT_AND_RESAMPLE_RESOURCE_NAME _inform("output_resource_name: " + output_resource_name) # Clean the output resource if requested if self.clean_output_resource_first: _inform("Deleting resouce: " + output_resource_name + " for:") _inform(" project: " + self.project) _inform(" subject: " + self.subject) _inform(" session: " + self.session) delete_resource.delete_resource( self.username, self.password, str_utils.get_server_name(self.server), self.project, self.subject, self.session, output_resource_name, True) script_file_start_name = working_directory_name script_file_start_name += os.sep + self.subject script_file_start_name += '.' + self.PIPELINE_NAME script_file_start_name += '.' + self.project script_file_start_name += '.' + self.session # Create script to submit to do the actual work work_script_name = script_file_start_name + '.XNAT_PBS_job.sh' with contextlib.suppress(FileNotFoundError): os.remove(work_script_name) work_script = open(work_script_name, 'w') nodes_spec = 'nodes=1:ppn=1' walltime_spec = 'walltime=' + str(self.walltime_limit_hours) + ':00:00' vmem_spec = 'vmem=' + str(self.vmem_limit_gbs) + 'gb' mem_spec = 'mem=' + str(self.mem_limit_gbs) + 'gb' work_script.write('#PBS -l ' + nodes_spec + ',' + walltime_spec + ',' + vmem_spec + ',' + mem_spec + os.linesep) # work_script.write('#PBS -q HCPput' + os.linesep) work_script.write('#PBS -o ' + working_directory_name + os.linesep) work_script.write('#PBS -e ' + working_directory_name + os.linesep) work_script.write(os.linesep) work_script.write(self.xnat_pbs_jobs_home + os.sep + '7T' + os.sep + 'DeDriftAndResampleHCP7T' + os.sep + 'DeDriftAndResampleHCP7T.XNAT.sh \\' + os.linesep) work_script.write(' --user="******" \\' + os.linesep) work_script.write(' --password="******" \\' + os.linesep) work_script.write(' --server="' + str_utils.get_server_name(self.server) + '" \\' + os.linesep) work_script.write(' --project="' + self.project + '" \\' + os.linesep) work_script.write(' --subject="' + self.subject + '" \\' + os.linesep) work_script.write(' --session="' + self.session + '" \\' + os.linesep) work_script.write(' --structural-reference-project="' + self.structural_reference_project + '" \\' + os.linesep) work_script.write(' --structural-reference-session="' + self.structural_reference_session + '" \\' + os.linesep) work_script.write(' --working-dir="' + working_directory_name + '" \\' + os.linesep) work_script.write(' --workflow-id="' + workflow_id + '" \\' + os.linesep) # work_script.write(' --keep-all' + ' \\' + os.linesep) # work_script.write(' --prevent-push' + ' \\' + os.linesep) work_script.write(' --setup-script=' + self.setup_script + os.linesep) work_script.close() os.chmod(work_script_name, stat.S_IRWXU | stat.S_IRWXG) # Create script to put the results into the DB put_script_name = script_file_start_name + '.XNAT_PBS_PUT_job.sh' self.create_put_script(put_script_name, self.username, self.password, self.put_server, self.project, self.subject, self.session, working_directory_name, output_resource_name, self.PIPELINE_NAME) # Submit the job to do the work work_submit_cmd = 'qsub ' + work_script_name _inform("work_submit_cmd: " + work_submit_cmd) completed_work_submit_process = subprocess.run( work_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) work_job_no = str_utils.remove_ending_new_lines(completed_work_submit_process.stdout) _inform("work_job_no: " + work_job_no) # Submit the job to put the results in the DB put_submit_cmd = 'qsub -W depend=afterok:' + work_job_no + ' ' + put_script_name _inform("put_submit_cmd: " + put_submit_cmd) completed_put_submit_process = subprocess.run( put_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) put_job_no = str_utils.remove_ending_new_lines(completed_put_submit_process.stdout) _inform("put_job_no: " + put_job_no) else: _inform("Unable to submit jobs")
def submit_jobs(self): _debug("submit_jobs") if self.validate_parameters(): _inform("") _inform("--------------------------------------------------") _inform("Submitting " + self.PIPELINE_NAME + " jobs for") _inform(" Project: " + self.project) _inform(" Subject: " + self.subject) _inform(" Session: " + self.session) _inform("--------------------------------------------------") # make sure working directories don't have the same name based on # the same start time by sleeping a few seconds time.sleep(5) current_seconds_since_epoch = int(time.time()) # build the working directory name self._working_directory_name = self.build_home self._working_directory_name += os.sep + self.project self._working_directory_name += os.sep + self.PIPELINE_NAME self._working_directory_name += '.' + self.subject self._working_directory_name += '.' + str( current_seconds_since_epoch) # make the working directory _inform("making working directory: " + self._working_directory_name) os.makedirs(name=self._working_directory_name) # get JSESSION ID jsession_id = xnat_access.get_jsession_id( server=os_utils.getenv_required('XNAT_PBS_JOBS_XNAT_SERVER'), username=self.username, password=self.password) _inform("jsession_id: " + jsession_id) # get XNAT Session ID (a.k.a. the experiment ID, e.g. ConnectomeDB_E1234) xnat_session_id = xnat_access.get_session_id( server=os_utils.getenv_required('XNAT_PBS_JOBS_XNAT_SERVER'), username=self.username, password=self.password, project=kself.project, subject=self.subject, session=self.session) _inform("xnat_session_id: " + xnat_session_id) # get XNAT Workflow ID workflow_obj = xnat_access.Workflow( self.username, self.password, os_utils.getenv_required('XNAT_PBS_JOBS_XNAT_SERVER'), jsession_id) self._workflow_id = workflow_obj.create_workflow( xnat_session_id, self.project, self.PIPELINE_NAME, 'Queued') _inform("workflow_id: " + self._workflow_id) # determine output resource name self._output_resource_name = 'Diffusion_preproc' # clean the output resource if requested if self.clean_output_resource_first: _inform("Deleting resource: " + self._output_resource_name + " for:") _inform(" project: " + self.project) _inform(" subject: " + self.subject) _inform(" session: " + self.session) delete_resource.delete_resource( self.username, self.password, str_utils.get_server_name(self.server), self.project, self.subject, self.session, self._output_resource_name) # create script to do the PreEddy work self._create_pre_eddy_script() # create script to do the Eddy work self._create_eddy_script() # create script to do the PostEddy work self._create_post_eddy_script() # create script to put the results into the DB put_script_name = self._get_scripts_start_name( ) + '.XNAT_PBS_PUT_job.sh' self.create_put_script(put_script_name, self.username, self.password, self.put_server, self.project, self.subject, self.session, self._working_directory_name, self._output_resource_name, self.PIPELINE_NAME) # Submit the job to do the Pre-Eddy work pre_eddy_submit_cmd = 'qsub ' + self._pre_eddy_script_name _inform("pre_eddy_submit_cmd: " + pre_eddy_submit_cmd) completed_pre_eddy_submit_process = subprocess.run( pre_eddy_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) pre_eddy_job_no = str_utils.remove_ending_new_lines( completed_pre_eddy_submit_process.stdout) _inform("pre_eddy_job_no: " + pre_eddy_job_no) # Submit the job to do the Eddy work eddy_submit_cmd = 'qsub -W depend=afterok:' + pre_eddy_job_no + ' ' + self._eddy_script_name _inform("eddy_submit_cmd: " + eddy_submit_cmd) completed_eddy_submit_process = subprocess.run( eddy_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) eddy_job_no = str_utils.remove_ending_new_lines( completed_eddy_submit_process.stdout) _inform("eddy_job_no: " + eddy_job_no) # Submit the job to do the Post-Eddy work post_eddy_submit_cmd = 'qsub -W depend=afterok:' + eddy_job_no + ' ' + self._post_eddy_script_name _inform("post_eddy_submit_cmd: " + post_eddy_submit_cmd) completed_post_eddy_submit_process = subprocess.run( post_eddy_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) post_eddy_job_no = str_utils.remove_ending_new_lines( completed_post_eddy_submit_process.stdout) _inform("post_eddy_job_no: " + post_eddy_job_no) # Submit the job to put the results in the DB put_submit_cmd = 'qsub -W depend=afterok:' + post_eddy_job_no + ' ' + put_script_name _inform("put_submit_cmd: " + put_submit_cmd) completed_put_submit_process = subprocess.run( put_submit_cmd, shell=True, check=True, stdout=subprocess.PIPE, universal_newlines=True) put_job_no = str_utils.remove_ending_new_lines( completed_put_submit_process.stdout) _inform("put_job_no: " + put_job_no) else: _inform("Unable to submit jobs")