def addMkDirJob(workflow=None, mkdir=None, outputDir=None, namespace=None, version=None,\ parentJobLs=None, extraDependentInputLs=None): """ 2012.10.2, increment workflow.no_of_jobs 2012.9.11 make sure that parentJobLs and extraDependentInputLs are not None. 2012.3.10 add argument parentJobLs, extraDependentInputLs 2011-11-28 get namespace and version from workflow first 2011-9-14 """ # Add a mkdir job for any directory. job = Job(namespace=getattr(workflow, 'namespace', namespace), name=mkdir.name, \ version=getattr(workflow, 'version', version)) job.addArguments(outputDir) job.folder = outputDir #custom attribute job.output = outputDir #custom attribute workflow.addJob(job) if parentJobLs: for parentJob in parentJobLs: if parentJob: workflow.depends(parent=parentJob, child=job) if extraDependentInputLs: for input in extraDependentInputLs: if input is not None: job.uses(input, transfer=True, register=True, link=Link.INPUT) if hasattr(workflow, 'no_of_jobs'): #2012.10.2 workflow.no_of_jobs += 1 return job
def add_vol_surf_snapshot_step(self, dax, jobs_before, vol, surfs): snapshot_file_1 = File("snapshot_sagittal_%d.png" % self.SNAPSHOT_NUMBER) snapshot_file_2 = File("snapshot_coronal_%d.png" % self.SNAPSHOT_NUMBER) snapshot_file_3 = File("snapshot_axial_%d.png" % self.SNAPSHOT_NUMBER) job = Job("qc_snapshot") job.addArguments(str(self.SNAPSHOT_NUMBER), "vol_surf", vol) for surf in surfs: job.addArguments(surf) job.uses(vol, link=Link.INPUT) for surf in surfs: job.uses(surf, link=Link.INPUT) job.uses(snapshot_file_1, link=Link.OUTPUT, transfer=True, register=False) job.uses(snapshot_file_2, link=Link.OUTPUT, transfer=True, register=False) job.uses(snapshot_file_3, link=Link.OUTPUT, transfer=True, register=False) dax.addJob(job) for job_before in jobs_before: dax.depends(job, job_before) self.SNAPSHOT_NUMBER += 1
def _pre_filter_fastq(self, index, suffix_len): pre_filter = Job(name='pre_filter_fastq.py') pre_filter.invoke('all', self._state_update % 'Pre-filter reads file part %d' % (index + 1)) prefix = 'reads%d' % index # Inputs reads = File(('x%0' + str(suffix_len) + 'd') % index) # Outputs full_fastq = File('%s_full.fastq' % prefix) reject = File('%s_reject.fastq' % prefix) stats = File('%s.stats' % prefix) # Arguments trims = ','.join([str(i) for i in self._trims]) trims = '0' if trims == ',' else trims pre_filter.addArguments(reads, '-r', '%d' % self._read_length, '-t', '%s' % trims) pre_filter.addArguments('-p', prefix) # Uses pre_filter.uses(reads, link=Link.INPUT) for t in self._trims: fastq_t = File('%s_%d.fastq' % (prefix, t)) pre_filter.uses(fastq_t, link=Link.OUTPUT, transfer=False, register=False) pre_filter.uses(full_fastq, link=Link.OUTPUT, transfer=False, register=False) pre_filter.uses(reject, link=Link.OUTPUT, transfer=False, register=False) pre_filter.uses(stats, link=Link.OUTPUT, transfer=False, register=False) self.adag.addJob(pre_filter)
def _perm_index(self, index_type, read_length, read_format='fastq', seed='F2'): perm_index = Job(name='perm') perm_index.invoke('all', self._state_update % 'Pre-computing %s index file' % index_type.capitalize()) prefix = self._get_index_hash(read_length) # Input files fa_input = File('h%s/%s.fa' % (prefix, index_type)) # Output files hash_v = self._get_index_hash(read_length, seed) index = File('h%d_%s_%s_%s.index' % (hash_v, index_type, seed, read_length)) # Arguments perm_index.addArguments(fa_input, '%d' % read_length, '--readFormat', read_format, '--seed', seed) perm_index.addArguments('-s', index) # Uses perm_index.uses(fa_input, link=Link.INPUT) # Save this file perm_index.uses(index, link=Link.OUTPUT, transfer=True, register=True) self.adag.addJob(perm_index) return perm_index
def generate_bem_surfaces(self, dax, job_recon): brain_surface = File(HeadModelFiles.BRAIN_SURFACE.value % self.subject) inner_skull_surface = File(HeadModelFiles.INNER_SKULL_SURFACE.value % self.subject) outer_skin_surface = File(HeadModelFiles.OUTER_SKIN_SURFACE.value % self.subject) outer_skull_surface = File(HeadModelFiles.OUTER_SKULL_SURFACE.value % self.subject) bem_surfs = [brain_surface, inner_skull_surface, outer_skin_surface, outer_skull_surface] job1 = Job(HeadModelJobNames.MNE_WATERSHED_BEM.value) job1.addArguments(self.subject) for surf in bem_surfs: job1.uses(surf, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_recon) brain_surface_centered = File(HeadModelFiles.BRAIN_SURFACE_CENTERED.value % self.subject) inner_skull_surface_centered = File(HeadModelFiles.INNER_SKULL_SURFACE_CENTERED.value % self.subject) outer_skin_surface_centered = File(HeadModelFiles.OUTER_SKIN_SURFACE_CENTERED.value % self.subject) outer_skull_surface_centered = File(HeadModelFiles.OUTER_SKULL_SURFACE_CENTERED.value % self.subject) centered_bem_surfs = [brain_surface_centered, inner_skull_surface_centered, outer_skin_surface_centered, outer_skull_surface_centered] last_job = job1 for i, surf in enumerate(bem_surfs): job2 = Job(T1JobNames.MRIS_CONVERT.value) job2.addArguments("--to-scanner", surf, centered_bem_surfs[i]) job2.uses(surf, link=Link.INPUT) job2.uses(centered_bem_surfs[i], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) dax.depends(job2, job1) last_job = job2 brain_surface_zip = File(HeadModelFiles.BRAIN_SURFACE_ZIP.value % self.subject) inner_skull_surface_zip = File(HeadModelFiles.INNER_SKULL_SURFACE_ZIP.value % self.subject) outer_skin_surface_zip = File(HeadModelFiles.OUTER_SKIN_SURFACE_ZIP.value % self.subject) outer_skull_surface_zip = File(HeadModelFiles.OUTER_SKULL_SURFACE_ZIP.value % self.subject) zip_bem_surfaces = [brain_surface_zip, inner_skull_surface_zip, outer_skin_surface_zip, outer_skull_surface_zip] # TODO: add vox2ras.txt inside zip for i, centered_surf in enumerate(centered_bem_surfs): job3 = Job(HeadModelJobNames.GEN_SURFACE_ZIP.value) job3.addArguments(centered_surf, zip_bem_surfaces[i], self.subject) job3.uses(centered_surf, link=Link.INPUT) job3.uses(zip_bem_surfaces[i], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job3) dax.depends(job3, last_job) return last_job
def _step_recon_all(main_dax, previous_job, main_config, subtype_config): LOGGER.info("Adding steps for image type: " + subtype_config.prefix.upper()) if subtype_config.is_dicom: LOGGER.info("DICOM identified for " + subtype_config.prefix.upper()) job1 = Job(name="mri_convert", node_label="DICOM input pre-processing for " + subtype_config.prefix.upper()) job1.addArguments(subtype_config.folder, subtype_config.raw_nii_file, "--out_orientation", "RAS", "-rt", "nearest") job1.uses(subtype_config.folder, link=Link.INPUT) job1.uses(subtype_config.raw_nii_file, link=Link.OUTPUT, transfer=True, register=False) subtype_config.main_data = subtype_config.raw_nii_file main_dax.addJob(job1) if previous_job is not None: LOGGER.debug("Job dependency %s - %s" % (job1, previous_job)) main_dax.depends(job1, previous_job) previous_job = job1 job2 = Job(name="recon-all", node_label="Call 'recon-all' for " + subtype_config.prefix.upper()) if subtype_config.prefix == SubtypeConfiguration.T1: LOGGER.debug("T1 branch..") job2.addArguments("-s", main_config.subject_name, "-i", subtype_config.main_data, "-all", "-parallel", "-openmp", main_config.number_of_threads) # TODO see if these can work for implicit job dependency generation job2.uses(main_config.mri.aseg_mgz_file, link=Link.OUTPUT, transfer=True) job2.uses(main_config.mri.t1_mgz_file, link=Link.OUTPUT, transfer=True) else: LOGGER.debug("recon-all steps for " + subtype_config.prefix) job2.addArguments("-s", main_config.subject_name, "-" + subtype_config.prefix.upper(), subtype_config.main_data, "-" + subtype_config.prefix.upper() + "pial", "-autorecon3", "-parallel", "-openmp", main_config.number_of_threads) job2.uses(subtype_config.main_data, link=Link.INPUT) main_dax.addJob(job2) if previous_job is not None: LOGGER.debug("Job dependency %s - %s" % (job2, previous_job)) main_dax.depends(job2, previous_job) return job2
def _ensure_input_format(self, file_format, input_name, output_name, dax): input_file = File(input_name) output_file = None job = None if file_format == "dicom": output_file = File(output_name) job = Job(T1JobNames.MRI_CONVERT.value) job.addArguments("-it", "dicom", input_file, output_file) job.uses(input_file, link=Link.INPUT) job.uses(output_file, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job) if output_file is None: output_file = input_file return output_file, job
def _bar_plot(self): bar_plot = Job(name='bar_plot') bar_plot.invoke('all', self._state_update % 'Plot summary out file') # Input files summary_file = File('%s.summary.out' % self._prefix) # Output files pdf_file = File('%s.ps' % self._prefix) # Arguments bar_plot.addArguments('--output-file', pdf_file, summary_file) # Uses bar_plot.uses(summary_file, link=Link.INPUT) bar_plot.uses(pdf_file, link=Link.OUTPUT, transfer=True, register=False) self.adag.addJob(bar_plot)
def _farish_compact(self): farish_compact = Job(name='farish_compact') farish_compact.invoke('all', self._state_update % 'Farish Compact') # Input files unmapped = File('%s.unmapped.fastq' % self._prefix) # Output files compact = File('%s.compact' % self._prefix) # Arguments farish_compact.addArguments(unmapped, '-o', compact) # Uses farish_compact.uses(unmapped, link=Link.INPUT) farish_compact.uses(compact, link=Link.OUTPUT, transfer=True, register=False) self.adag.addJob(farish_compact)
def _fastq_split(self, splits=2, suffix_len=2): fastq_split = Job(name='fastq-split') fastq_split.invoke('all', self._state_update % 'Splitting input reads file into %d parts' % splits) # Inputs reads = File(self._reads) # Arguments fastq_split.addArguments(reads, '%d' % splits) # Uses fastq_split.uses(reads, link=Link.INPUT) for i in range(splits): split_i = File(('x%0' + str(suffix_len) + 'd') % i) # Outputs fastq_split.uses(split_i, link=Link.OUTPUT, transfer=False, register=False) self.adag.addJob(fastq_split)
def cat(inputs, output, o_link=Link.OUTPUT, o_transfer=False, o_register=False): cat = Job(name='merge') # Outputs output = File(output) for input_file in inputs: # Inputs input_file = File(input_file) # Arguments cat.addArguments(input_file) # Uses cat.uses(input_file, link=Link.INPUT) cat.setStdout(output) cat.uses(output, link=o_link, transfer=o_transfer, register=o_register) return cat
def _merge_stats(self): merge_stats = Job(name='merge-stats') merge_stats.invoke('all', self._state_update % 'Merging adaptor stats file') # Outputs adaptor_stats = File('%s.adaptor.stats' % self._prefix) # Arguments merge_stats.addArguments('reads*.stats', adaptor_stats) for i in range(self._splits): # Inputs stats_i = File('reads%d.stats' % i) # Uses merge_stats.uses(stats_i, link=Link.INPUT) # Outputs merge_stats.uses(adaptor_stats, link=Link.OUTPUT, transfer=True, register=False) self.adag.addJob(merge_stats)
def _parse_clipped_alignment(self, input_file): parse_clipped_alignment = Job(name='parse_clipped_alignment') parse_clipped_alignment.invoke('all', self._state_update % 'Parse clipped alignment') # Input files input_file = File(input_file) # Output files info = File('%s.info' % input_file.name) self._info_files.append(info.name) # Arguments parse_clipped_alignment.addArguments(input_file) parse_clipped_alignment.setStdout(info) # Uses parse_clipped_alignment.uses(input_file, link=Link.INPUT) parse_clipped_alignment.uses(info, link=Link.OUTPUT, transfer=False, register=False) self.adag.addJob(parse_clipped_alignment)
def _parse_alignment(self, input_file, tag): parse_alignment = Job(name='parse_alignment') parse_alignment.invoke('all', self._state_update % 'Parse alignment') # Input files input_file = File(input_file) # Output files vis = File('%s.vis' % input_file.name) self._vis_files.append(vis.name) # Arguments parse_alignment.addArguments(input_file, '--strandRule', self._strand_rule, '--tag', tag) parse_alignment.setStdout(vis) # Uses parse_alignment.uses(input_file, link=Link.INPUT) parse_alignment.uses(vis, link=Link.OUTPUT, transfer=False, register=False) self.adag.addJob(parse_alignment)
def steps_recon_all(main_dax, config): last_job = _step_recon_all(main_dax, None, config, config.t1) return_job = last_job if config.t2.not_empty: last_job = _step_recon_all(main_dax, last_job, config, config.t2) if config.flair.not_empty: last_job = _step_recon_all(main_dax, last_job, config, config.flair) mri_job = Job( name="mri_convert", node_label="Generate APARC-ASEG nifti file with good orientation") mri_job.addArguments(config.mri.aseg_mgz_file, config.mri.aseg_nii_file, "--out_orientation", "RAS", "-rt", "nearest") mri_job.uses(config.mri.aseg_mgz_file, link=Link.INPUT) mri_job.uses(config.mri.aseg_nii_file, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(mri_job) main_dax.depends(mri_job, last_job) return return_job
def addReadCountJob(self, workflow, VariousReadCountJava=None, GenomeAnalysisTKJar=None,\ refFastaFList=None, bamF=None, baiF=None, readCountOutputF=None,\ parentJobLs=[], job_max_memory = 1000, extraArguments="", \ transferOutput=False): """ 2011-11-25 """ javaMemRequirement = "-Xms128m -Xmx%sm"%job_max_memory refFastaF = refFastaFList[0] job = Job(namespace=workflow.namespace, name=VariousReadCountJava.name, version=workflow.version) job.addArguments(javaMemRequirement, '-jar', GenomeAnalysisTKJar, "-T", "VariousReadCount",\ '-R', refFastaF, '-o', readCountOutputF, "-mmq 30") job.addArguments("-I", bamF) if extraArguments: job.addArguments(extraArguments) self.addJobUse(job, file=GenomeAnalysisTKJar, transfer=True, register=True, link=Link.INPUT) job.uses(bamF, transfer=True, register=True, link=Link.INPUT) job.uses(baiF, transfer=True, register=True, link=Link.INPUT) self.registerFilesAsInputToJob(job, refFastaFList) job.output = readCountOutputF job.uses(readCountOutputF, transfer=transferOutput, register=True, link=Link.OUTPUT) workflow.addJob(job) yh_pegasus.setJobProperRequirement(job, job_max_memory=job_max_memory) for parentJob in parentJobLs: workflow.depends(parent=parentJob, child=job) """ #2013.3.24 should use this job = self.addGATKJob(self, workflow=None, executable=None, GenomeAnalysisTKJar=None, GATKAnalysisType=None,\ inputFile=None, inputArgumentOption=None, refFastaFList=None, inputFileList=None,\ argumentForEachFileInInputFileList=None,\ interval=None, outputFile=None, \ parentJobLs=None, transferOutput=True, job_max_memory=2000,\ frontArgumentList=None, extraArguments=None, extraArgumentList=None, extraOutputLs=None, \ extraDependentInputLs=None, no_of_cpus=None, walltime=120, **keywords) """ return job
def _merge_info(self, info_files, gtf_file): merge_info = Job(name='merge-info') merge_info.invoke('all', self._state_update % 'Merging info files to generate GTF file') # Outputs gtf_file = File(gtf_file) for info_file in info_files: # Inputs info_i = File(info_file) # Arguments merge_info.addArguments(info_i) # Uses merge_info.uses(info_i, link=Link.INPUT) # Arguments merge_info.addArguments(gtf_file) # Outputs merge_info.uses(gtf_file, link=Link.OUTPUT, transfer=True, register=False) self.adag.addJob(merge_info)
def add_projection_computation_steps(self, dax, job_mapping_details): projection_mat = File(ProjectionCompFiles.PROJECTION_MAT.value % (self.sensors_type, self.atlas_suffix)) sensor_positions = File(ProjectionCompFiles.SENS_POSITIONS.value % self.sensors_type) centers_txt = File(AsegFiles.CENTERS_TXT.value % self.atlas_suffix) job = Job(ProjectionCompJobNames.COMPUTE_PROJ_MAT.value) job.addArguments(sensor_positions, centers_txt, projection_mat, self.subject) job.uses(sensor_positions, link=Link.INPUT) job.uses(centers_txt, link=Link.INPUT) job.uses(projection_mat, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job) dax.depends(job, job_mapping_details)
def add_seeg_gain_dp_computation_steps(self, dax, job_seeg_xyz, job_mapping_details): seeg_xyz = File(SEEGCompFiles.SEEG_XYZ.value) centers_txt = File(AsegFiles.CENTERS_TXT.value % self.atlas_suffix) gain_mat = File(SeegGainFiles.SEEG_GAIN_DP_MAT.value % self.atlas_suffix) job = Job(ProjectionCompJobNames.COMPUTE_PROJ_MAT.value) job.addArguments(seeg_xyz, centers_txt, gain_mat, self.subject) job.uses(seeg_xyz, link=Link.INPUT) job.uses(centers_txt, link=Link.INPUT) job.uses(gain_mat, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job) dax.depends(job, job_seeg_xyz) dax.depends(job, job_mapping_details)
def generate_job(self): job = Job(self.data["name"], node_label=self.data["name"]) # Outputs parsing for output in self.data["outputs"]: if output["command-line-key"]: if "value-template" in output and output["value-template"]: self.data["command-line"] = self.data[ "command-line"].replace(output["command-line-key"], output["value-template"]) else: self.data["command-line"] = self.data[ "command-line"].replace(output["command-line-key"], self.map[output["name"]]) if output["type"] == "File": job.uses(self.map[output["name"]], link=Link.OUTPUT, transfer=self.transfer) # Inputs parsing inputsMap = {} for input in self.data["inputs"]: if input["command-line-key"]: self.data["command-line"] = self.data["command-line"].replace( input["command-line-key"], self.map[input["name"]]) inputsMap[input["command-line-key"]] = self.map[input["name"]] if input["type"] == "File": job.uses(self.map[input["name"]], link=Link.INPUT) # Outputs value-template parsing for output in self.data["outputs"]: if "value-template" in output and output["value-template"]: for input in inputsMap: if input in output["value-template"]: value = output["value-template"].replace( input, inputsMap[input]) job.uses(value, link=Link.OUTPUT, transfer=self.transfer) break self.create_wrapper() return job
def _transcript_prediction(self): transcript_prediction = Job(name='transcript_prediction') transcript_prediction.invoke('all', self._state_update % 'Transcript Prediction') # Input files features_counts = File('%s.feature.cnts' % self._prefix) gtf = File('%s.splice_candidates.gtf' % self._prefix) # Output files transcript_counts = File('%s.transcripts.cnts' % self._prefix) # Arguments transcript_prediction.addArguments(features_counts, '-g', gtf) # Uses transcript_prediction.setStdout(transcript_counts) transcript_prediction.uses(features_counts, link=Link.INPUT) transcript_prediction.uses(gtf, link=Link.INPUT) transcript_prediction.uses(transcript_counts, link=Link.OUTPUT, transfer=True, register=False) self.adag.addJob(transcript_prediction)
def add_aseg_generation_steps(self, dax, job_recon): lh_aseg = File(AsegFiles.LH_ASEG.value) lh_aseg_annot = File(AsegFiles.LH_ASEG_ANNOT.value) fs_lut = File(Inputs.FS_LUT.value) job5 = Job(AsegGenJobNames.ASEG_CONCAT.value) job5.addArguments(lh_aseg, lh_aseg_annot, self.lh_labels, fs_lut, self.subject) job5.uses(fs_lut, link=Link.INPUT) job5.uses(lh_aseg, link=Link.OUTPUT, transfer=True, register=True) job5.uses(lh_aseg_annot, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job5) rh_aseg = File(AsegFiles.RH_ASEG.value) rh_aseg_annot = File(AsegFiles.RH_ASEG_ANNOT.value) fs_lut = File(Inputs.FS_LUT.value) job6 = Job(AsegGenJobNames.ASEG_CONCAT.value) job6.addArguments(rh_aseg, rh_aseg_annot, self.rh_labels, fs_lut, self.subject) job6.uses(fs_lut, link=Link.INPUT) job6.uses(rh_aseg, link=Link.OUTPUT, transfer=True, register=True) job6.uses(rh_aseg_annot, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job6) lbl_list = map(int, self.lh_labels.strip('"').split() + self.rh_labels.strip('"').split()) for aseg_label in lbl_list: aparc_aseg_mgz = File(T1Files.APARC_ASEG_MGZ.value % self.atlas_suffix) norm_mgz = File(T1Files.NORM_MGZ.value) aseg_mgz = File(AsegFiles.ASEG_MGZ.value % aseg_label) job1 = Job(AsegGenJobNames.MRI_PRETESS.value) job1.addArguments(aparc_aseg_mgz, str(aseg_label), norm_mgz, aseg_mgz) job1.uses(aparc_aseg_mgz, link=Link.INPUT) job1.uses(norm_mgz, link=Link.INPUT) job1.uses(aseg_mgz, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job1) dax.depends(job1, job_recon) aseg_not_smooth = File(AsegFiles.ASEG_NOT_SMOOTH.value % aseg_label) job2 = Job(AsegGenJobNames.MRI_TESSELLATE.value) job2.addArguments(aseg_mgz, str(aseg_label), aseg_not_smooth) job2.uses(aseg_mgz, link=Link.INPUT) job2.uses(aseg_not_smooth, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job2) dax.depends(job2, job1) aseg_not_smooth_main = File(AsegFiles.ASEG_NOT_SMOOTH_MAIN.value % aseg_label) job3 = Job(AsegGenJobNames.MRIS_EXTRACT.value) job3.addArguments(aseg_not_smooth, aseg_not_smooth_main) job3.uses(aseg_not_smooth, link=Link.INPUT) job3.uses(aseg_not_smooth_main, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job3) dax.depends(job3, job2) lh_aseg_lbl = File(AsegFiles.ASEG_LBL_LH.value % aseg_label) job4 = Job(AsegGenJobNames.MRIS_SMOOTH.value) job4.addArguments("-nw", aseg_not_smooth_main, lh_aseg_lbl) job4.uses(aseg_not_smooth_main, link=Link.INPUT) job4.uses(lh_aseg_lbl, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job4) dax.depends(job4, job3) aseg_lbl = File(AsegFiles.ASEG_LBL.value % aseg_label) job_rename = Job("mv") job_rename.addArguments(lh_aseg_lbl, aseg_lbl) job_rename.uses(lh_aseg_lbl, link=Link.INPUT) job_rename.uses(aseg_lbl, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job_rename) dax.depends(job_rename, job4) if aseg_label in map(int, self.lh_labels.strip('"').split()): job5.uses(aseg_lbl, link=Link.INPUT) else: job6.uses(aseg_lbl, link=Link.INPUT) dax.depends(job5, job_rename) dax.depends(job6, job_rename) lh_centered_aseg = File(AsegFiles.LH_CENTERED_ASEG.value) job7 = Job(T1JobNames.MRIS_CONVERT.value) job7.addArguments("--to-scanner", lh_aseg, lh_centered_aseg) job7.uses(lh_aseg, link=Link.INPUT) job7.uses(lh_centered_aseg, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job7) dax.depends(job7, job5) rh_centered_aseg = File(AsegFiles.RH_CENTERED_ASEG.value) job8 = Job(T1JobNames.MRIS_CONVERT.value) job8.addArguments("--to-scanner", rh_aseg, rh_centered_aseg) job8.uses(rh_aseg, link=Link.INPUT) job8.uses(rh_centered_aseg, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job8) dax.depends(job8, job6) return job7, job8
def add_sensor_model_steps(self, dax, job_source_model): # TODO: seeg positions file should contain only positions, not labels in order to work with OpenMEEG seeg_xyz = File(SEEGCompFiles.SEEG_XYZ.value) head_model_geom = File(HeadModelFiles.HEAD_MODEL_GEOM.value) head_model_cond = File(HeadModelFiles.HEAD_MODEL_COND.value) bem_tri_surfs = [ File(HeadModelFiles.INNER_SKULL_SURFACE_LOW_TRI.value % self.subject), File(HeadModelFiles.OUTER_SKULL_SURFACE_LOW_TRI.value % self.subject), File(HeadModelFiles.OUTER_SKIN_SURFACE_LOW_TRI.value % self.subject), File(HeadModelFiles.BRAIN_SURFACE_LOW_TRI.value % self.subject) ] head2ipm_file = File(SensorModelFiles.SEEG_H2IPM.value) job1 = Job(HeadModelJobNames.OM_ASSEMBLE.value) job1.addArguments("-h2ipm", head_model_geom, head_model_cond, seeg_xyz, head2ipm_file) for surf in bem_tri_surfs: job1.uses(surf, link=Link.INPUT) job1.uses(head_model_geom, link=Link.INPUT) job1.uses(head_model_cond, link=Link.INPUT) job1.uses(seeg_xyz, link=Link.INPUT) job1.uses(head2ipm_file, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_source_model) lh_white_dsm = File(SourceModelFiles.LH_WHITE_RESAMP_DSM.value % (self.trg_subject, self.atlas_suffix)) lh_ds2ipm_file = File(SensorModelFiles.LH_DS2IPM.value % (self.trg_subject, self.atlas_suffix)) job2 = Job(HeadModelJobNames.OM_ASSEMBLE.value) job2.addArguments("-ds2ipm", head_model_geom, head_model_cond, lh_white_dsm, seeg_xyz, lh_ds2ipm_file) for surf in bem_tri_surfs: job2.uses(surf, link=Link.INPUT) job2.uses(head_model_geom, link=Link.INPUT) job2.uses(head_model_cond, link=Link.INPUT) job2.uses(lh_white_dsm, link=Link.INPUT) job2.uses(seeg_xyz, link=Link.INPUT) job2.uses(lh_ds2ipm_file, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) dax.depends(job2, job1) rh_white_dsm = File(SourceModelFiles.RH_WHITE_RESAMP_DSM.value % (self.trg_subject, self.atlas_suffix)) rh_ds2ipm_file = File(SensorModelFiles.RH_DS2IPM.value % (self.trg_subject, self.atlas_suffix)) job3 = Job(HeadModelJobNames.OM_ASSEMBLE.value) job3.addArguments("-ds2ipm", head_model_geom, head_model_cond, rh_white_dsm, seeg_xyz, rh_ds2ipm_file) for surf in bem_tri_surfs: job3.uses(surf, link=Link.INPUT) job3.uses(head_model_geom, link=Link.INPUT) job3.uses(head_model_cond, link=Link.INPUT) job3.uses(rh_white_dsm, link=Link.INPUT) job3.uses(seeg_xyz, link=Link.INPUT) job3.uses(rh_ds2ipm_file, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job3) dax.depends(job3, job1) return job2, job3
def generate_dax(self, daxfile): from Pegasus.DAX3 import ADAG, Job, File, Link # The DAX generator dax = ADAG("pipeline") # Some bits of metadata. Shoulf put plenty more here. dax.metadata("owner", self.pipeline.owner) dax.metadata("basename", self.pipeline.basename) dax.metadata("version", self.pipeline.version) # string tag -> pegasus File object mapping of all the # inputs and outputs used by any pipeline stage. files = {} # First generate the overall inputs to the pipeline, # i.e. ones that are not generated by any other stage # but must be specified at the start for tag in self.pipeline.input_tags(): path = self.info['inputs'].get(tag) files[tag] = File(path) # Now go through the pipeline in sequence. for stage_name, stage_class in self.pipeline.sequence(): # The stage in the pipeline. We describe the meaning of it # (which image it corresponds to) # in the transformation catalog generation job = Job(stage_name, id=stage_name) # Configuration files for this job. # These will not be built during the pipeline and must be # provided by the user for config_tag, config_filename in stage_class.config.items(): filename = self.pipeline.cfg[stage_name]['config'][config_tag] config_path = os.path.join(self.config_dir(), filename) config = File(config_path) job.uses(config, link=Link.INPUT) # Input files for the job, either created by the user or by previous # stages. In either case they should be in the "files" dictionary, because # precursor jobs will have been added before this one. for input_tag in stage_class.inputs.keys(): job.uses(files[input_tag], link=Link.INPUT) # Output files from the job. These will be created by the job # and used by future jobs for output_tag, output_type in stage_class.outputs.items(): output_filename = "{}.{}".format(output_tag, output_type) output = File(output_filename) job.uses(output, link=Link.OUTPUT, transfer=True, register=True) files[output_tag] = output # Add this job to the pipeline dax.addJob(job) # Tell pegasus which jobs this one depends on. # The pipeline already knows this information. # The pipeline.sequence command runs through # the jobs in an order that guarantees that a job's predecessors are # always done before it is, so they will always exist in the dax by this point. for predecessor_name in self.pipeline.dependencies(stage_name): dax.depends(stage_name, predecessor_name) # Generate the final DAX XML file. dax.writeXML(open(daxfile, "w"))
def write(self, filename, name='dax'): """Generate Pegasus abstract workflow (DAX). Parameters ---------- filename : `str` File to write the DAX to. name : `str`, optional Name of the DAX. Returns ------- `Pegasus.ADAG` Abstract workflow used by Pegasus' planner. """ dax = ADAG(name) # Add files to DAX-level replica catalog. catalog = {} for file_id in self.files: attrs = self.graph.node[file_id] f = File(attrs['lfn']) # Add physical file names, if any. urls = attrs.get('urls') if urls is not None: sites = attrs.get('sites') if sites is None: sites = ','.join(len(urls) * ['local']) for url, site in zip(urls.split(','), sites.split(',')): f.addPFN(PFN(url, site)) catalog[attrs['lfn']] = f dax.addFile(f) # Add jobs to the DAX. for task_id in self.tasks: attrs = self.graph.node[task_id] job = Job(name=attrs['name'], id=task_id) # Add job command line arguments replacing any file name with # respective Pegasus file object. args = attrs.get('args') if args is not None and args: args = args.split() lfns = list(set(catalog) & set(args)) if lfns: indices = [args.index(lfn) for lfn in lfns] for idx, lfn in zip(indices, lfns): args[idx] = catalog[lfn] job.addArguments(*args) # Specify job's inputs. inputs = [file_id for file_id in self.graph.predecessors(task_id)] for file_id in inputs: attrs = self.graph.node[file_id] f = catalog[attrs['lfn']] job.uses(f, link=Link.INPUT) # Specify job's outputs outputs = [file_id for file_id in self.graph.successors(task_id)] for file_id in outputs: attrs = self.graph.node[file_id] f = catalog[attrs['lfn']] job.uses(f, link=Link.OUTPUT) streams = attrs.get('streams') if streams is not None: if streams & 1 != 0: job.setStdout(f) if streams & 2 != 0: job.setStderr(f) dax.addJob(job) # Add job dependencies to the DAX. for task_id in self.tasks: parents = set() for file_id in self.graph.predecessors(task_id): parents.update(self.graph.predecessors(file_id)) for parent_id in parents: dax.depends(parent=dax.getJob(parent_id), child=dax.getJob(task_id)) # Finally, write down the workflow in DAX format. with open(filename, 'w') as f: dax.writeXML(f)
def _add_fs_steps(self, dax, job_b0, job_t1, job_aparc_aseg): b0_nii_gz = File(DWIFiles.B0_NII_GZ.value) b0_in_t1_mgz = File(CoregFiles.B0_IN_T1_MGZ.value) d2t_reg = File("d2t.reg") d2t_lta = File("d2t.lta") d2t_mat = File("d2t.mat") job1 = Job(CoregJobNames.BBREGISTER.value) job1.addArguments(self.subject, b0_nii_gz, b0_in_t1_mgz, d2t_reg, d2t_lta, d2t_mat) job1.uses(b0_nii_gz, link=Link.INPUT) job1.uses(b0_in_t1_mgz, link=Link.OUTPUT, transfer=False, register=False) job1.uses(d2t_reg, link=Link.OUTPUT, transfer=False, register=False) job1.uses(d2t_lta, link=Link.OUTPUT, transfer=False, register=False) job1.uses(d2t_mat, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job1) dax.depends(job1, job_b0) b0_in_t1_nii_gz = File(CoregFiles.B0_IN_T1.value) job2 = Job(T1JobNames.MRI_CONVERT.value) job2.addArguments(b0_in_t1_mgz, b0_in_t1_nii_gz, "--out_orientation", "RAS") job2.uses(b0_in_t1_mgz, link=Link.INPUT) job2.uses(b0_in_t1_nii_gz, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job2) dax.depends(job2, job1) # self.qc_snapshots.add_2vols_snapshot_step(dax, [job1], t1_nii_gz, b0_in_t1_nii_gz) t1_mgz = File(T1Files.T1_MGZ.value) t1_in_d_nii_gz = File(CoregFiles.T1_IN_D.value) t1_in_d_lta = File(CoregFiles.T1_IN_D.value + ".lta") job3 = Job(CoregJobNames.MRI_VOL2VOL.value) job3.addArguments("--mov", t1_mgz, "--targ", b0_nii_gz, "--o", t1_in_d_nii_gz, "--lta-inv", d2t_lta, "--save-reg") job3.uses(t1_mgz, link=Link.INPUT) job3.uses(b0_nii_gz, link=Link.INPUT) job3.uses(d2t_lta, link=Link.INPUT) job3.uses(t1_in_d_lta, link=Link.OUTPUT, transfer=False, register=False) job3.uses(t1_in_d_nii_gz, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job3) dax.depends(job3, job_t1) dax.depends(job3, job2) # self.qc_snapshots.add_2vols_snapshot_step(dax, [job3], b0_nii_gz, t1_in_d_nii_gz) aparc_aseg_mgz = File(T1Files.APARC_ASEG_MGZ.value % self.atlas_suffix) aparc_aseg_in_d_nii_gz = File(CoregFiles.APARC_ASEG_IN_D.value % self.atlas_suffix) job4 = Job(CoregJobNames.MRI_VOL2VOL.value) job4.addArguments("--mov", aparc_aseg_mgz, "--targ", b0_nii_gz, "--o", aparc_aseg_in_d_nii_gz, "--reg", t1_in_d_lta, "--nearest") job4.uses(aparc_aseg_mgz, link=Link.INPUT) job4.uses(b0_nii_gz, link=Link.INPUT) job4.uses(t1_in_d_lta, link=Link.INPUT) job4.uses(aparc_aseg_in_d_nii_gz, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job4) dax.depends(job4, job_aparc_aseg) dax.depends(job4, job3) # self.qc_snapshots.add_2vols_snapshot_step(dax, [job4], b0_nii_gz, aparc_aseg_in_d_nii_gz) # self.qc_snapshots.add_3vols_snapshot_step(dax, [job3, job4], t1_in_d_nii_gz, b0_nii_gz, aparc_aseg_in_d_nii_gz) return job3, job4
def _analyze(self): analyze = Job(name='analyze_samfile') analyze.invoke('all', self._state_update % 'Analyzing SAM file') # Input files sam_file = File('%s.sam' % self._prefix) # Output files genes_counts = File('%s.gene.cnts' % self._prefix) features_counts = File('%s.feature.cnts' % self._prefix) ambiguous_genes_counts = File('%s.ambiguousGenes.cnts' % self._prefix) overlap_genes_counts = File('%s.overlapGene.cnts' % self._prefix) summary_out = File('%s.summary.out' % self._prefix) # Arguments analyze.addArguments(sam_file, '--prefix', self._prefix) # Uses analyze.uses(sam_file, link=Link.INPUT) analyze.uses(genes_counts, link=Link.OUTPUT, transfer=True, register=False) analyze.uses(features_counts, link=Link.OUTPUT, transfer=True, register=False) analyze.uses(ambiguous_genes_counts, link=Link.OUTPUT, transfer=True, register=False) analyze.uses(overlap_genes_counts, link=Link.OUTPUT, transfer=True, register=False) analyze.uses(summary_out, link=Link.OUTPUT, transfer=True, register=False) self.adag.addJob(analyze)
def add_source_model_steps(self, dax, job_head_model, job_mapping_details): t1_mgz = File(T1Files.T1_MGZ.value) lh_white = File(T1Files.LH_WHITE.value) rh_white = File(T1Files.RH_WHITE.value) whites = [lh_white, rh_white] head_model_geom = File(HeadModelFiles.HEAD_MODEL_GEOM.value) head_model_cond = File(HeadModelFiles.HEAD_MODEL_COND.value) bem_tri_surfs = [ File(HeadModelFiles.INNER_SKULL_SURFACE_LOW_TRI.value % self.subject), File(HeadModelFiles.OUTER_SKULL_SURFACE_LOW_TRI.value % self.subject), File(HeadModelFiles.OUTER_SKIN_SURFACE_LOW_TRI.value % self.subject), File(HeadModelFiles.BRAIN_SURFACE_LOW_TRI.value % self.subject) ] lh_white_resamp = File(SourceModelFiles.LH_WHITE_RESAMP.value % self.trg_subject) rh_white_resamp = File(SourceModelFiles.RH_WHITE_RESAMP.value % self.trg_subject) whites_resamp = [lh_white_resamp, rh_white_resamp] lh_white_tri = File(SourceModelFiles.LH_WHITE_RESAMP_TRI.value % self.trg_subject) rh_white_tri = File(SourceModelFiles.RH_WHITE_RESAMP_TRI.value % self.trg_subject) whites_resamp_tri = [lh_white_tri, rh_white_tri] lh_white_ssm = File(SourceModelFiles.LH_WHITE_RESAMP_SSM.value % self.trg_subject) rh_white_ssm = File(SourceModelFiles.RH_WHITE_RESAMP_SSM.value % self.trg_subject) whites_resamp_ssm = [lh_white_ssm, rh_white_ssm] lh_dipoles_file = File(AsegFiles.LH_DIPOLES_TXT.value % self.atlas_suffix) rh_dipoles_file = File(AsegFiles.RH_DIPOLES_TXT.value % self.atlas_suffix) dipoles_files = [lh_dipoles_file, rh_dipoles_file] lh_white_dsm = File(SourceModelFiles.LH_WHITE_RESAMP_DSM.value % (self.trg_subject, self.atlas_suffix)) rh_white_dsm = File(SourceModelFiles.RH_WHITE_RESAMP_DSM.value % (self.trg_subject, self.atlas_suffix)) whites_resamp_dsm = [lh_white_dsm, rh_white_dsm] last_job = None for idx, hemi in enumerate(["lh", "rh"]): job1 = Job(ResamplingJobNames.MRI_SURF2SURF.value) job1.addArguments("--srcsubject", self.subject, "--trgsubject", self.trg_subject, "--hemi", hemi, "--sval-xyz", "white", "--tval", "white-%s" % self.trg_subject, "--tval-xyz", t1_mgz) job1.uses(t1_mgz, link=Link.INPUT) job1.uses(whites[idx], link=Link.INPUT) job1.uses(whites_resamp[idx], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_head_model) job2 = Job(HeadModelJobNames.CONVERT_TO_BRAIN_VISA.value) job2.addArguments(whites_resamp[idx], whites_resamp_tri[idx], self.subject) job2.uses(whites_resamp[idx], link=Link.INPUT) job2.uses(whites_resamp_tri[idx], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) dax.depends(job2, job1) job3 = Job(HeadModelJobNames.OM_ASSEMBLE.value) job3.addArguments("-SurfSourceMat", head_model_geom, head_model_cond, whites_resamp_tri[idx], whites_resamp_ssm[idx]) for surf in bem_tri_surfs: job3.uses(surf, link=Link.INPUT) job3.uses(head_model_geom, link=Link.INPUT) job3.uses(head_model_cond, link=Link.INPUT) job3.uses(whites_resamp_tri[idx], link=Link.INPUT) job3.uses(whites_resamp_ssm[idx], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job3) dax.depends(job3, job2) job4 = Job(HeadModelJobNames.OM_ASSEMBLE.value) job4.addArguments("-DipSourceMat", head_model_geom, head_model_cond, dipoles_files[idx], whites_resamp_dsm[idx]) for surf in bem_tri_surfs: job4.uses(surf, link=Link.INPUT) job4.uses(head_model_geom, link=Link.INPUT) job4.uses(head_model_cond, link=Link.INPUT) job4.uses(dipoles_files[idx], link=Link.INPUT) job4.uses(whites_resamp_dsm[idx], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job4) dax.depends(job4, job_mapping_details) dax.depends(job4, job3) last_job = job4 return last_job
def add_dwi_processing_steps(self, dax): last_job = None dwi_input = File(Inputs.DWI_INPUT.value) if self.use_gradient == "True": dwi_input_no_gradient = File(Inputs.DWI_INPUT_NO_GRAD.value) bvec_input = File(Inputs.DWI_BVEC.value) bval_input = File(Inputs.DWI_BVAL.value) job_gradient = Job(DWIJobNames.MRCONVERT.value) job_gradient.addArguments(dwi_input_no_gradient, "-fsl", bvec_input, bval_input, dwi_input) job_gradient.uses(dwi_input_no_gradient, link=Link.INPUT) job_gradient.uses(bvec_input, link=Link.INPUT) job_gradient.uses(bval_input, link=Link.INPUT) job_gradient.uses(dwi_input, link=Link.OUTPUT, transfer=True, register=True) last_job = job_gradient dax.addJob(job_gradient) dwi_conv_output = None if self.dwi_reversed == "True": job1 = None job2 = None if self.dwi_format != "mif": if self.dwi_format == "dicom": # TODO: is mrconvert interactive for reversed aquisition data? Should we use the next lines? # mrchoose 0 mrconvert $DATA/DWI ./dwi_raw.mif -force # mrchoose 1 mrconvert $DATA/DWI ./dwi_raw_re.mif -force print "Not implemented!" else: dwi_conv_output = File(DWIFiles.DWI_RAW_MIF.value) job1 = Job(DWIJobNames.MRCONVERT.value, node_label="Convert DWI to MIF") job1.addArguments(dwi_input, dwi_conv_output) job1.uses(dwi_input, link=Link.INPUT) job1.uses(dwi_conv_output, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job1) if last_job is not None: dax.depends(job1, last_job) dwi_re_input = File(DWIFiles.DWI_RE_NII_GZ.value) dwi_re = File(DWIFiles.DWI_RE_MIF.value) job2 = Job(DWIJobNames.MRCONVERT.value, node_label="Convert DWI_RE to MIF") job2.addArguments(dwi_re_input, dwi_re) job2.uses(dwi_re_input, link=Link.INPUT) job2.uses(dwi_re, link=Link.OUTPUT, transfer=True, register=False) dax.addJob(job2) dwi_pre_output = File(DWIFiles.DWI_MIF.value) job3 = Job(DWIJobNames.DWIPREPROC.value, node_label="DWI preprocessing") if self.os == "LINUX": job3.addArguments(dwi_conv_output, dwi_pre_output, "-pe_dir", self.dwi_pe_dir, "-rpe_pair", dwi_conv_output, dwi_re, "-nthreads", self.mrtrix_threads) else: job3.addArguments(self.dwi_pe_dir, dwi_conv_output, dwi_pre_output, "-rpe_pair", dwi_conv_output, dwi_re, "-nthreads", self.mrtrix_threads) job3.uses(dwi_conv_output, link=Link.INPUT) job3.uses(dwi_re, link=Link.INPUT) job3.uses(dwi_pre_output, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job3) if job1 is not None: dax.depends(job3, job1) if job2 is not None: dax.depends(job3, job2) last_job = job3 else: job1 = None if self.dwi_format != "mif" and self.use_gradient != "True": dwi_conv_output = File(DWIFiles.DWI_RAW_MIF.value) job1 = Job(DWIJobNames.MRCONVERT.value, node_label="Convert DWI to MIF") job1.addArguments(dwi_input, dwi_conv_output) job1.uses(dwi_input, link=Link.INPUT) job1.uses(dwi_conv_output, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job1) if last_job is not None: dax.depends(job1, last_job) if dwi_conv_output is None: dwi_conv_output = dwi_input dwi_pre_output = File(DWIFiles.DWI_MIF.value) job2 = Job(DWIJobNames.DWIPREPROC.value, node_label="DWI preprocessing") if self.os == "LINUX": job2.addArguments(dwi_conv_output, dwi_pre_output, "-pe_dir", self.dwi_pe_dir, "-rpe_none", "-nthreads", self.mrtrix_threads) else: job2.addArguments(self.dwi_pe_dir, dwi_conv_output, dwi_pre_output, "-rpe_none", "-nthreads", self.mrtrix_threads) job2.uses(dwi_conv_output, link=Link.INPUT) job2.uses(dwi_pre_output, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) if job1 is not None: dax.depends(job2, job1) last_job = job2 mask_output = File(DWIFiles.MASK_MIF.value) job3 = Job(DWIJobNames.DWI2MASK.value, node_label="Create DWI mask") job3.addArguments(dwi_pre_output, mask_output, "-nthreads", self.mrtrix_threads) job3.uses(dwi_pre_output, link=Link.INPUT) job3.uses(mask_output, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job3) dax.depends(job3, last_job) b0_output = File(DWIFiles.B0_NII_GZ.value) job4 = Job(DWIJobNames.DWIEXTRACT.value, node_label="Extract DWI B0") job4.addArguments(dwi_pre_output, b0_output) job4.uses(dwi_pre_output, link=Link.INPUT) job4.uses(b0_output, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job4) dax.depends(job4, last_job) file_mask_nii_gz = File(DWIFiles.MASK_NII_GZ.value) job_convert_mask = Job(DWIJobNames.MRCONVERT.value) job_convert_mask.addArguments(mask_output, file_mask_nii_gz) job_convert_mask.uses(mask_output, link=Link.INPUT) job_convert_mask.uses(file_mask_nii_gz, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job_convert_mask) dax.depends(job_convert_mask, job3) self.qc_snapshots.add_2vols_snapshot_step(dax, [job_convert_mask, job4], file_mask_nii_gz, b0_output) return job4, job3
def generate_workflow(self): "Generate a workflow (DAX, config files, and replica catalog)" ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') dax = ADAG("mgrast-prod-%s" % ts) # These are all the global input files for the workflow metagenome = File(self.mgfile) self.add_replica(self.mgfile, os.path.abspath(self.mgfile)) # QC job qcJob = Job("wrapper-qc", node_label="wrapper-qc") qcJob.addArguments("-input", self.mgfile) qcJob.addArguments("-format", self.file_format) qcJob.addArguments("-out_prefix", "075") qcJob.addArguments("-assembled", self.assembled) qcJob.addArguments("-filter_options", self.filter_options) qcJob.addArguments("-proc", "8") qcJob.uses(metagenome, link=Link.INPUT) qcJob.uses("075.assembly.coverage", link=Link.OUTPUT, transfer=False) qcJob.uses("075.qc.stats", link=Link.OUTPUT, transfer=False) qcJob.uses("075.upload.stats", link=Link.OUTPUT, transfer=False) qcJob.profile("globus", "maxwalltime", "60") qcJob.profile("globus", "hostcount", "8") qcJob.profile("globus", "count", "8") dax.addJob(qcJob) # Preprocess Job preprocessJob = Job("wrapper-preprocess", node_label="wrapper-preprocess") preprocessJob.addArguments("-input", self.mgfile) preprocessJob.addArguments("-format", self.file_format) preprocessJob.addArguments("-out_prefix", "100.preprocess") preprocessJob.addArguments("-filter_options", self.filter_options) preprocessJob.uses(metagenome, link=Link.INPUT) preprocessJob.uses("100.preprocess.passed.fna", link=Link.OUTPUT, transfer=False) preprocessJob.uses("100.preprocess.removed.fna", link=Link.OUTPUT, transfer=False) preprocessJob.profile("globus", "maxwalltime", "20") dax.addJob(preprocessJob) # Dereplicate Job dereplicateJob = Job("wrapper-dereplicate", node_label="wrapper-dereplicate") dereplicateJob.addArguments("-input=100.preprocess.passed.fna") dereplicateJob.addArguments("-out_prefix=150.dereplication") dereplicateJob.addArguments("-prefix_length=%s" % self.prefix_length) dereplicateJob.addArguments("-dereplicate=%s" % self.dereplicate) dereplicateJob.addArguments("-memory=10") dereplicateJob.uses("100.preprocess.passed.fna", link=Link.INPUT) dereplicateJob.uses("150.dereplication.passed.fna", link=Link.OUTPUT, transfer=False) dereplicateJob.uses("150.dereplication.removed.fna", link=Link.OUTPUT, transfer=False) dereplicateJob.profile("globus", "maxwalltime", "10") dax.addJob(dereplicateJob) dax.depends(dereplicateJob, preprocessJob) # Bowtie Screen Job bowtieJob = Job("wrapper-bowtie-screen", node_label="wrapper-bowtie-screen") bowtieJob.addArguments("-input=150.dereplication.passed.fna") bowtieJob.addArguments("-output=299.screen.passed.fna") bowtieJob.addArguments("-index=%s" % self.screen_indexes) bowtieJob.addArguments("-bowtie=%s" % self.bowtie) bowtieJob.addArguments("-proc=8") bowtieJob.uses("150.dereplication.passed.fna", link=Link.INPUT) bowtieJob.uses("299.screen.passed.fna", link=Link.OUTPUT, transfer=False) bowtieJob.profile("globus", "maxwalltime", "30") bowtieJob.profile("globus", "hostcount", "8") bowtieJob.profile("globus", "count", "8") dax.addJob(bowtieJob) dax.depends(bowtieJob, dereplicateJob) # Genecalling Job geneJob = Job("wrapper-genecalling", node_label="wrapper-genecalling") geneJob.addArguments("-input=299.screen.passed.fna") geneJob.addArguments("-out_prefix=350.genecalling.coding") geneJob.addArguments("-type=%s" % self.fgs_type) geneJob.addArguments("-size=100") geneJob.addArguments("-proc=8") geneJob.uses("299.screen.passed.fna", link=Link.INPUT) geneJob.uses("350.genecalling.coding.faa", link=Link.OUTPUT, transfer=False) geneJob.uses("350.genecalling.coding.fna", link=Link.OUTPUT, transfer=False) geneJob.profile("globus", "maxwalltime", "30") geneJob.profile("globus", "hostcount", "8") geneJob.profile("globus", "count", "8") dax.addJob(geneJob) dax.depends(geneJob, bowtieJob) # Cluster (Genecalling) Job cluster1Job = Job("wrapper-cluster", node_label="wrapper-cluster") cluster1Job.addArguments("-input=350.genecalling.coding.faa") cluster1Job.addArguments("-out_prefix=550.cluster") cluster1Job.addArguments("-aa") cluster1Job.addArguments("-pid=%s" % self.aa_pid) cluster1Job.addArguments("-memory=20") cluster1Job.uses("350.genecalling.coding.faa", link=Link.INPUT) cluster1Job.uses("550.cluster.aa%s.faa" % self.aa_pid, link=Link.OUTPUT, transfer=False) cluster1Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.OUTPUT, transfer=False) cluster1Job.profile("globus", "maxwalltime", "10") dax.addJob(cluster1Job) dax.depends(cluster1Job, geneJob) # Blat_prot Job blatprotJob = Job("wrapper-blat-prot", node_label="wrapper-blat-prot") blatprotJob.addArguments("--input=550.cluster.aa%s.faa" % self.aa_pid) blatprotJob.addArguments("--output=650.superblat.sims") blatprotJob.uses("550.cluster.aa%s.faa" % self.aa_pid, link=Link.INPUT) blatprotJob.uses("650.superblat.sims", link=Link.OUTPUT, transfer=False) blatprotJob.profile("globus", "maxwalltime", "2880") blatprotJob.profile("globus", "hostcount", "24") blatprotJob.profile("globus", "count", "24") dax.addJob(blatprotJob) dax.depends(blatprotJob, cluster1Job) # Annotate Sims (Blat Prod) Job annotatesims1Job = Job("wrapper-annotate-sims", node_label="wrapper-annotate-sims") annotatesims1Job.addArguments("-input=650.superblat.sims") annotatesims1Job.addArguments("-out_prefix=650") annotatesims1Job.addArguments("-aa") annotatesims1Job.addArguments("-ach_ver=%s" % self.ach_annotation_ver) annotatesims1Job.addArguments("-ann_file=m5nr_v1.bdb") annotatesims1Job.uses("650.superblat.sims", link=Link.INPUT) annotatesims1Job.uses("650.aa.sims.filter", link=Link.OUTPUT, transfer=False) annotatesims1Job.uses("650.aa.expand.protein", link=Link.OUTPUT, transfer=False) annotatesims1Job.uses("650.aa.expand.lca", link=Link.OUTPUT, transfer=False) annotatesims1Job.uses("650.aa.expand.ontology", link=Link.OUTPUT, transfer=False) annotatesims1Job.profile("globus", "maxwalltime", "720") dax.addJob(annotatesims1Job) dax.depends(annotatesims1Job, blatprotJob) # Search RNA Job searchJob = Job("wrapper-search-rna", node_label="wrapper-search-rna") searchJob.addArguments("-input=100.preprocess.passed.fna") searchJob.addArguments("-output=425.search.rna.fna") searchJob.addArguments("-rna_nr=%s" % self.m5rna_clust) searchJob.addArguments("-size=100") searchJob.addArguments("-proc=8") searchJob.uses("100.preprocess.passed.fna", link=Link.INPUT) searchJob.uses("425.search.rna.fna", link=Link.OUTPUT, transfer=False) searchJob.profile("globus", "maxwalltime", "120") searchJob.profile("globus", "hostcount", "8") searchJob.profile("globus", "count", "8") dax.addJob(searchJob) dax.depends(searchJob, preprocessJob) # CLuster (Search RNA) Job cluster2Job = Job("wrapper-cluster", node_label="wrapper-cluster") cluster2Job.addArguments("-input=425.search.rna.fna") cluster2Job.addArguments("-out_prefix=440.cluster") cluster2Job.addArguments("-rna") cluster2Job.addArguments("-pid=%s" % self.rna_pid) cluster2Job.addArguments("-memory=20") cluster2Job.uses("425.search.rna.fna", link=Link.INPUT) cluster2Job.uses("440.cluster.rna%s.fna" % self.rna_pid, link=Link.OUTPUT, transfer=False) cluster2Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.OUTPUT, transfer=False) cluster2Job.profile("globus", "maxwalltime", "30") dax.addJob(cluster2Job) dax.depends(cluster2Job, searchJob) # Blat_rna Job blatrnaJob = Job("wrapper-blat-rna", node_label="wrapper-blat-rna") blatrnaJob.addArguments("--input=440.cluster.rna%s.fna" % self.rna_pid) blatrnaJob.addArguments("-rna_nr=m5rna") blatrnaJob.addArguments("--output=450.rna.sims") blatrnaJob.addArguments("-assembled=%s" % self.assembled) blatrnaJob.uses("440.cluster.rna%s.fna" % self.rna_pid, link=Link.INPUT) blatrnaJob.uses("450.rna.sims", link=Link.OUTPUT, transfer=False) blatrnaJob.profile("globus", "maxwalltime", "20") dax.addJob(blatrnaJob) dax.depends(blatrnaJob, cluster2Job) # Annotate Sims (Blat RNA) Job annotatesims2Job = Job("wrapper-annotate-sims", node_label="wrapper-annotate-sims") annotatesims2Job.addArguments("-input=450.rna.sims") annotatesims2Job.addArguments("-out_prefix=450") annotatesims2Job.addArguments("-rna") annotatesims2Job.addArguments("-ach_ver=%s" % self.ach_annotation_ver) annotatesims2Job.addArguments("-ann_file=m5nr_v1.bdb") annotatesims2Job.uses("450.rna.sims", link=Link.INPUT) annotatesims2Job.uses("450.rna.sims.filter", link=Link.OUTPUT, transfer=False) annotatesims2Job.uses("450.rna.expand.rna", link=Link.OUTPUT, transfer=False) annotatesims2Job.uses("450.rna.expand.lca", link=Link.OUTPUT, transfer=False) annotatesims2Job.profile("globus", "maxwalltime", "30") dax.addJob(annotatesims2Job) dax.depends(annotatesims2Job, blatrnaJob) # Index Sim Seq Job indexJob = Job("wrapper-index", node_label="wrapper-index") indexJob.addArguments("-in_seqs=350.genecalling.coding.fna") indexJob.addArguments("-in_seqs=425.search.rna.fna") indexJob.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) indexJob.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) indexJob.addArguments("-in_sims=650.aa.sims.filter") indexJob.addArguments("-in_sims=450.rna.sims.filter") indexJob.addArguments("-output=700.annotation.sims.filter.seq") indexJob.addArguments("-ach_ver=%s" % self.ach_annotation_ver) indexJob.addArguments("-memory=10") indexJob.addArguments("-ann_file=m5nr_v1.bdb") indexJob.uses("350.genecalling.coding.fna", link=Link.INPUT) indexJob.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) indexJob.uses("650.aa.sims.filter", link=Link.INPUT) indexJob.uses("425.search.rna.fna", link=Link.INPUT) indexJob.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) indexJob.uses("450.rna.sims.filter", link=Link.INPUT) indexJob.uses("700.annotation.sims.filter.seq", link=Link.OUTPUT, transfer=False) indexJob.uses("700.annotation.sims.filter.seq.index", link=Link.OUTPUT, transfer=False) indexJob.profile("globus", "maxwalltime", "120") dax.addJob(indexJob) dax.depends(indexJob, geneJob) dax.depends(indexJob, cluster1Job) dax.depends(indexJob, cluster2Job) dax.depends(indexJob, searchJob) dax.depends(indexJob, annotatesims1Job) # Annotate Summary Job (13) summary13Job = Job("wrapper-summary", node_label="wrapper-summary") summary13Job.addArguments("-job=1") summary13Job.addArguments("-in_expand=650.aa.expand.protein") summary13Job.addArguments("-in_expand=450.rna.expand.rna") summary13Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary13Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary13Job.addArguments("-in_assemb=075.assembly.coverage") summary13Job.addArguments("-in_index=700.annotation.sims.filter.seq.index") summary13Job.addArguments("-output=700.annotation.md5.summary") summary13Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary13Job.addArguments("-type=md5") summary13Job.uses("075.assembly.coverage", link=Link.INPUT) summary13Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary13Job.uses("650.aa.expand.protein", link=Link.INPUT) summary13Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary13Job.uses("450.rna.expand.rna", link=Link.INPUT) summary13Job.uses("700.annotation.sims.filter.seq.index", link=Link.INPUT) summary13Job.uses("700.annotation.md5.summary", link=Link.OUTPUT, transfer=True) summary13Job.profile("globus", "maxwalltime", "30") dax.addJob(summary13Job) dax.depends(summary13Job, qcJob) dax.depends(summary13Job, cluster1Job) dax.depends(summary13Job, cluster2Job) dax.depends(summary13Job, indexJob) dax.depends(summary13Job, annotatesims1Job) dax.depends(summary13Job, annotatesims2Job) # Annotate Summary Job (14) summary14Job = Job("wrapper-summary", node_label="wrapper-summary") summary14Job.addArguments("-job=1") summary14Job.addArguments("-in_expand=650.aa.expand.protein") summary14Job.addArguments("-in_expand=450.rna.expand.rna") summary14Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary14Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary14Job.addArguments("-in_assemb=075.assembly.coverage") summary14Job.addArguments("-output=700.annotation.function.summary") summary14Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary14Job.addArguments("-type=function") summary14Job.uses("075.assembly.coverage", link=Link.INPUT) summary14Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary14Job.uses("650.aa.expand.protein", link=Link.INPUT) summary14Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary14Job.uses("450.rna.expand.rna", link=Link.INPUT) summary14Job.uses("700.annotation.function.summary", link=Link.OUTPUT, transfer=True) summary14Job.profile("globus", "maxwalltime", "30") dax.addJob(summary14Job) dax.depends(summary14Job, qcJob) dax.depends(summary14Job, cluster1Job) dax.depends(summary14Job, cluster2Job) dax.depends(summary14Job, annotatesims1Job) dax.depends(summary14Job, annotatesims2Job) # Annotate Summary Job (15) summary15Job = Job("wrapper-summary", node_label="wrapper-summary") summary15Job.addArguments("-job=1") summary15Job.addArguments("-in_expand=650.aa.expand.protein") summary15Job.addArguments("-in_expand=450.rna.expand.rna") summary15Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary15Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary15Job.addArguments("-in_assemb=075.assembly.coverage") summary15Job.addArguments("-output=700.annotation.organism.summary") summary15Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary15Job.addArguments("-type=organism") summary15Job.uses("075.assembly.coverage", link=Link.INPUT) summary15Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary15Job.uses("650.aa.expand.protein", link=Link.INPUT) summary15Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary15Job.uses("450.rna.expand.rna", link=Link.INPUT) summary15Job.uses("700.annotation.organism.summary", link=Link.OUTPUT, transfer=True) summary15Job.profile("globus", "maxwalltime", "30") dax.addJob(summary15Job) dax.depends(summary15Job, qcJob) dax.depends(summary15Job, cluster1Job) dax.depends(summary15Job, cluster2Job) dax.depends(summary15Job, annotatesims1Job) dax.depends(summary15Job, annotatesims2Job) # Annotate Summary Job (16) summary16Job = Job("wrapper-summary", node_label="wrapper-summary") summary16Job.addArguments("-job=1") summary16Job.addArguments("-in_expand=650.aa.expand.lca") summary16Job.addArguments("-in_expand=450.rna.expand.lca") summary16Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary16Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary16Job.addArguments("-in_assemb=075.assembly.coverage") summary16Job.addArguments("-output=700.annotation.lca.summary") summary16Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary16Job.addArguments("-type=lca") summary16Job.uses("075.assembly.coverage", link=Link.INPUT) summary16Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary16Job.uses("650.aa.expand.lca", link=Link.INPUT) summary16Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary16Job.uses("450.rna.expand.lca", link=Link.INPUT) summary16Job.uses("700.annotation.lca.summary", link=Link.OUTPUT, transfer=True) summary16Job.profile("globus", "maxwalltime", "30") dax.addJob(summary16Job) dax.depends(summary16Job, qcJob) dax.depends(summary16Job, cluster1Job) dax.depends(summary16Job, cluster2Job) dax.depends(summary16Job, annotatesims1Job) dax.depends(summary16Job, annotatesims2Job) # Annotate Summary Job (17) summary17Job = Job("wrapper-summary", node_label="wrapper-summary") summary17Job.addArguments("-job=1") summary17Job.addArguments("-in_expand=650.aa.expand.ontology") summary17Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary17Job.addArguments("-in_assemb=075.assembly.coverage") summary17Job.addArguments("-output=700.annotation.ontology.summary") summary17Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary17Job.addArguments("-type=ontology") summary17Job.uses("075.assembly.coverage", link=Link.INPUT) summary17Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary17Job.uses("650.aa.expand.ontology", link=Link.INPUT) summary17Job.uses("700.annotation.ontology.summary", link=Link.OUTPUT, transfer=True) summary17Job.profile("globus", "maxwalltime", "30") dax.addJob(summary17Job) dax.depends(summary17Job, qcJob) dax.depends(summary17Job, cluster1Job) dax.depends(summary17Job, annotatesims1Job) # Annotate Summary Job (18) summary18Job = Job("wrapper-summary", node_label="wrapper-summary") summary18Job.addArguments("-job=1") summary18Job.addArguments("-in_expand=650.aa.expand.protein") summary18Job.addArguments("-in_expand=450.rna.expand.rna") summary18Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary18Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary18Job.addArguments("-in_assemb=075.assembly.coverage") summary18Job.addArguments("-output=700.annotation.source.stats") summary18Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary18Job.addArguments("-type=source") summary18Job.uses("075.assembly.coverage", link=Link.INPUT) summary18Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary18Job.uses("650.aa.expand.protein", link=Link.INPUT) summary18Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary18Job.uses("450.rna.expand.rna", link=Link.INPUT) summary18Job.uses("700.annotation.source.stats", link=Link.OUTPUT, transfer=True) summary18Job.profile("globus", "maxwalltime", "30") dax.addJob(summary18Job) dax.depends(summary18Job, qcJob) dax.depends(summary18Job, cluster1Job) dax.depends(summary18Job, cluster2Job) dax.depends(summary18Job, annotatesims1Job) dax.depends(summary18Job, annotatesims2Job) # Write the DAX file dax.writeXMLFile(self.daxfile) # Generate the replica catalog self.generate_replica_catalog()
def add_surface_resampling_steps(self, dax, job_recon): t1_mgz = File(T1Files.T1_MGZ.value) lh_pial = File(T1Files.LH_PIAL.value) rh_pial = File(T1Files.RH_PIAL.value) pials = [lh_pial, rh_pial] lh_aparc_annot = File(T1Files.LH_APARC_ANNOT.value % self.atlas_suffix) rh_aparc_annot = File(T1Files.RH_APARC_ANNOT.value % self.atlas_suffix) aparcs = [lh_aparc_annot, rh_aparc_annot] lh_pial_resamp = File(ResamplingFiles.LH_PIAL_RESAMP.value % self.trg_subject) rh_pial_resamp = File(ResamplingFiles.RH_PIAL_RESAMP.value % self.trg_subject) pials_resamp = [lh_pial_resamp, rh_pial_resamp] lh_aparc_annot_resamp = File( ResamplingFiles.LH_APARC_ANNOT_RESAMP.value % (self.trg_subject, self.atlas_suffix)) rh_aparc_annot_resamp = File( ResamplingFiles.RH_APARC_ANNOT_RESAMP.value % (self.trg_subject, self.atlas_suffix)) aparcs_resamp = [lh_aparc_annot_resamp, rh_aparc_annot_resamp] last_job = None for idx, hemi in enumerate(["lh", "rh"]): job1 = Job(ResamplingJobNames.MRI_SURF2SURF.value) job1.addArguments(self.atlas_suffix, "--srcsubject", self.subject, "--trgsubject", self.trg_subject, "--hemi", hemi, "--sval-xyz", "pial", "--tval", "pial-%s" % self.trg_subject, "--tval-xyz", t1_mgz) job1.uses(t1_mgz, link=Link.INPUT) job1.uses(pials[idx], link=Link.INPUT) job1.uses(pials_resamp[idx], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_recon) job2 = Job(ResamplingJobNames.MRI_SURF2SURF.value) job2.addArguments(self.atlas_suffix, "--srcsubject", self.subject, "--trgsubject", self.trg_subject, "--hemi", hemi, "--sval-annot", aparcs[idx], "--tval", aparcs_resamp[idx]) job2.uses(aparcs[idx], link=Link.INPUT) job2.uses(aparcs_resamp[idx], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) dax.depends(job2, job_recon) last_job = job2 lh_centered_pial = File(ResamplingFiles.LH_CENTERED_PIAL_RESAMP.value % self.trg_subject) job5 = Job(T1JobNames.MRIS_CONVERT.value) job5.addArguments("--to-scanner", lh_pial_resamp, lh_centered_pial) job5.uses(lh_pial_resamp, link=Link.INPUT) job5.uses(lh_centered_pial, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job5) dax.depends(job5, last_job) rh_centered_pial = File(ResamplingFiles.RH_CENTERED_PIAL_RESAMP.value % self.trg_subject) job6 = Job(T1JobNames.MRIS_CONVERT.value) job6.addArguments("--to-scanner", rh_pial_resamp, rh_centered_pial) job6.uses(rh_pial_resamp, link=Link.INPUT) job6.uses(rh_centered_pial, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job6) dax.depends(job6, last_job) t1_nii_gz = File(T1Files.T1_NII_GZ.value) self.qc_snapshots.add_vol_surf_snapshot_step( dax, [job5, job6], t1_nii_gz, [lh_centered_pial, rh_centered_pial]) self.qc_snapshots.add_surf_annot_snapshot_step(dax, [job5, job6], lh_centered_pial, lh_aparc_annot_resamp) self.qc_snapshots.add_surf_annot_snapshot_step(dax, [job5, job6], rh_centered_pial, rh_aparc_annot_resamp) return job6
def add_surf_annot_snapshot_step(self, dax, jobs_before, surf, annot): snapshot_file_1 = File("snapshot_surface_annotation_%d0.png" % self.SNAPSHOT_NUMBER) snapshot_file_2 = File("snapshot_surface_annotation_%d1.png" % self.SNAPSHOT_NUMBER) snapshot_file_3 = File("snapshot_surface_annotation_%d2.png" % self.SNAPSHOT_NUMBER) snapshot_file_4 = File("snapshot_surface_annotation_%d3.png" % self.SNAPSHOT_NUMBER) snapshot_file_5 = File("snapshot_surface_annotation_%d4.png" % self.SNAPSHOT_NUMBER) snapshot_file_6 = File("snapshot_surface_annotation_%d5.png" % self.SNAPSHOT_NUMBER) job = Job("qc_snapshot") job.addArguments(str(self.SNAPSHOT_NUMBER), "surf_annot", surf, annot) job.uses(surf, link=Link.INPUT) job.uses(annot, link=Link.INPUT) job.uses(snapshot_file_1, link=Link.OUTPUT, transfer=True, register=True) job.uses(snapshot_file_2, link=Link.OUTPUT, transfer=True, register=True) job.uses(snapshot_file_3, link=Link.OUTPUT, transfer=True, register=True) job.uses(snapshot_file_4, link=Link.OUTPUT, transfer=True, register=True) job.uses(snapshot_file_5, link=Link.OUTPUT, transfer=True, register=True) job.uses(snapshot_file_6, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job) for job_before in jobs_before: dax.depends(job, job_before) self.SNAPSHOT_NUMBER += 1
def run_python_on_parameters( self, job_name: Locator, python_module: Any, parameters: Union[Parameters, Dict[str, Any]], *, depends_on, resource_request: Optional[ResourceRequest] = None, override_conda_config: Optional[CondaConfiguration] = None, category: Optional[str] = None, ) -> DependencyNode: """ Schedule a job to run the given *python_module* on the given *parameters*. If this job requires other jobs to be executed first, include them in *depends_on*. This method returns a `DependencyNode` which can be used in *depends_on* for future jobs. """ job_dir = self.directory_for(job_name) ckpt_name = job_name / "___ckpt" checkpoint_path = job_dir / "___ckpt" depends_on = _canonicalize_depends_on(depends_on) if isinstance(python_module, str): fully_qualified_module_name = python_module else: fully_qualified_module_name = fully_qualified_name(python_module) # allow users to specify the parameters as a dict for convenience if not isinstance(parameters, Parameters): parameters = Parameters.from_mapping(parameters) # If we've already scheduled this identical job, # then don't schedule it again. params_sink = CharSink.to_string() YAMLParametersWriter().write(parameters, params_sink) signature = (fully_qualified_module_name, params_sink.last_string_written) if signature in self._signature_to_job: logging.info("Job %s recognized as a duplicate", job_name) return self._signature_to_job[signature] script_path = job_dir / "___run.sh" stdout_path = parameters.string( "logfile", default=str((job_dir / "___stdout.log").absolute())) self._conda_script_generator.write_shell_script_to( entry_point_name=fully_qualified_module_name, parameters=parameters, working_directory=job_dir, script_path=script_path, params_path=job_dir / "____params.params", stdout_file=stdout_path, ckpt_path=checkpoint_path, override_conda_config=override_conda_config, ) script_executable = Executable( namespace=self._namespace, name=str(job_name).replace("/", "_"), version="4.0", os="linux", arch="x86_64", ) script_executable.addPFN( path_to_pfn(script_path, site=self._default_site)) if not self._job_graph.hasExecutable(script_executable): self._job_graph.addExecutable(script_executable) job = Job(script_executable) self._job_graph.addJob(job) for parent_dependency in depends_on: if parent_dependency.job: self._job_graph.depends(job, parent_dependency.job) for out_file in parent_dependency.output_files: job.uses(out_file, link=Link.INPUT) if resource_request is not None: resource_request = self.default_resource_request.unify( resource_request) else: resource_request = self.default_resource_request if category: job.profile(Namespace.DAGMAN, "category", category) resource_request.apply_to_job(job, job_name=self._job_name_for(job_name)) # Handle Output Files # This is currently only handled as the checkpoint file # See: https://github.com/isi-vista/vista-pegasus-wrapper/issues/25 checkpoint_pegasus_file = path_to_pegasus_file(checkpoint_path, site=self._default_site, name=f"{ckpt_name}") if checkpoint_pegasus_file not in self._added_files: self._job_graph.addFile(checkpoint_pegasus_file) self._added_files.add(checkpoint_pegasus_file) # If the checkpoint file already exists, we want to add it to the replica catalog # so that we don't run the job corresponding to the checkpoint file again if checkpoint_path.exists(): with self._replica_catalog.open("a+") as handle: handle.write( f"{ckpt_name} file://{checkpoint_path} site={self._default_site}\n" ) job.uses(checkpoint_pegasus_file, link=Link.OUTPUT, transfer=True) dependency_node = DependencyNode.from_job( job, output_files=[checkpoint_pegasus_file]) self._signature_to_job[signature] = dependency_node logging.info("Scheduled Python job %s", job_name) return dependency_node
def add_seeg_positions_computation_steps(self, dax): ct_input = File(Inputs.CT_INPUT.value) ct_ras = File(SEEGCompFiles.CT_RAS_NII_GZ.value) job1 = Job(T1JobNames.MRI_CONVERT.value) job1.addArguments(ct_input, ct_ras, "--out_orientation", "RAS") job1.uses(ct_input, Link.INPUT) job1.uses(ct_ras, Link.OUTPUT, register=True, transfer=True) dax.addJob(job1) t1_ras = File(T1Files.T1_NII_GZ.value) ct_in_t1 = File(SEEGCompFiles.CT_IN_T1_NII_GZ.value) ct_to_t1_mat = File(SEEGCompFiles.CT_TO_T1_MAT.value) job2 = Job(CoregJobNames.FLIRT.value) job2.addArguments(ct_ras, t1_ras, ct_to_t1_mat, ct_in_t1) job2.uses(t1_ras, Link.INPUT) job2.uses(ct_ras, Link.INPUT) job2.uses(ct_in_t1, Link.OUTPUT, transfer=True, register=True) job2.uses(ct_to_t1_mat, Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) dax.depends(job2, job1) brain_mgz = File(T1Files.BRAIN_MGZ.value) brain_ras = File(SEEGCompFiles.BRAIN_RAS_NII_GZ.value) job3 = Job(T1JobNames.MRI_CONVERT.value) job3.addArguments(brain_mgz, brain_ras, "--out_orientation", "RAS") job3.uses(brain_mgz, Link.INPUT) job3.uses(brain_ras, Link.OUTPUT, transfer=True, register=True) dax.addJob(job3) brain_mask = File(SEEGCompFiles.BRAIN_MASK_NII_GZ.value) job4 = Job(SEEGCompJobNames.MRI_BINARIZE.value) job4.addArguments("--i", brain_ras, "--o", brain_mask, "--min", "10", "--erode", "4") job4.uses(brain_ras, Link.INPUT) job4.uses(brain_mask, Link.OUTPUT, transfer=True, register=True) dax.addJob(job4) dax.depends(job4, job3) masked_ct = File(SEEGCompFiles.MASKED_CT_NII_GZ.value) job5 = Job(SEEGCompJobNames.MRI_BINARIZE.value) job5.addArguments("--i", ct_in_t1, "--o", masked_ct, "--min", "1000", "--mask", brain_mask) job5.uses(ct_in_t1, Link.INPUT) job5.uses(brain_mask, Link.INPUT) job5.uses(masked_ct, Link.OUTPUT, transfer=True, register=True) dax.addJob(job5) dax.depends(job5, job2) dax.depends(job5, job4) dilated_ct = File(SEEGCompFiles.DILATED_CT_NII_GZ.value) job6 = Job(SEEGCompJobNames.MRI_BINARIZE.value) job6.addArguments("--i", masked_ct, "--o", dilated_ct, "--min", "0.5", "--dilate", "2", "--erode", "1") job6.uses(masked_ct, Link.INPUT) job6.uses(dilated_ct, Link.OUTPUT, transfer=True, register=True) dax.addJob(job6) dax.depends(job6, job5) labeled_ct = File(SEEGCompFiles.LABELED_CT_NII_GZ.value) job7 = Job(SEEGCompJobNames.LABEL_CT_WITH_DILATION.value) job7.addArguments(masked_ct, dilated_ct, labeled_ct, self.subj) job7.uses(masked_ct, Link.INPUT) job7.uses(dilated_ct, Link.INPUT) job7.uses(labeled_ct, Link.OUTPUT, transfer=True, register=True) dax.addJob(job7) dax.depends(job7, job6) schema_txt = File(Inputs.SCHEMA_TXT.value) job8 = Job(SEEGCompJobNames.GEN_SCHEMA_TXT.value) job8.addArguments(labeled_ct, schema_txt) job8.uses(labeled_ct, link=Link.INPUT) job8.uses(schema_txt, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job8) dax.depends(job8, job7) seeg_xyz = File(SEEGCompFiles.SEEG_XYZ.value) job9 = Job(SEEGCompJobNames.GEN_SEEG_XYZ.value) job9.addArguments(labeled_ct, schema_txt, seeg_xyz, self.subj) job9.uses(labeled_ct, Link.INPUT) job9.uses(schema_txt, Link.INPUT) job9.uses(seeg_xyz, Link.OUTPUT, transfer=True, register=True) dax.addJob(job9) dax.depends(job9, job7) dax.depends(job9, job8) return job9
def add_conversion_steps(self, dax, job_aparc_aseg, job_mapping_details, job_weights, job_lengths): weights_csv = File(TractsGenFiles.TRACT_COUNTS.value % self.atlas_suffix) lenghts_csv = File(TractsGenFiles.TRACT_LENGHTS.value % self.atlas_suffix) centers = File(AsegFiles.CENTERS_TXT.value % self.atlas_suffix) areas = File(AsegFiles.AREAS_TXT.value % self.atlas_suffix) orientations = File(AsegFiles.ORIENTATIONS_TXT.value % self.atlas_suffix) cortical = File(AsegFiles.CORTICAL_TXT.value % self.atlas_suffix) rm_to_aparc_aseg = File(AsegFiles.RM_TO_APARC_ASEG_TXT.value % self.atlas_suffix) # aparc_aseg = File(T1Files.APARC_ASEG_NII_GZ.value) job = Job("convert_output") job.addArguments(weights_csv, lenghts_csv, self.atlas_suffix) job.uses(weights_csv, link=Link.INPUT) job.uses(lenghts_csv, link=Link.INPUT) job.uses(centers, link=Link.INPUT) job.uses(areas, link=Link.INPUT) job.uses(orientations, link=Link.INPUT) job.uses(cortical, link=Link.INPUT) job.uses(rm_to_aparc_aseg, link=Link.INPUT) # job.uses(aparc_aseg, link=Link.INPUT) job.uses(File(OutputConvFiles.APARC_ASEG_COR_NII_GZ.value % self.atlas_suffix), link=Link.OUTPUT, transfer=True, register=False) job.uses(File(OutputConvFiles.CONNECTIVITY_ZIP.value % self.atlas_suffix), link=Link.OUTPUT, transfer=True, register=False) job.uses(File(T1Files.T1_NII_GZ.value), link=Link.INPUT) job.uses(File(T1Files.APARC_ASEG_NII_GZ.value % self.atlas_suffix), link=Link.INPUT) dax.addJob(job) dax.depends(job, job_aparc_aseg) dax.depends(job, job_mapping_details) dax.depends(job, job_weights) dax.depends(job, job_lengths) return job
def write_dax(self, filename='workflow.dax', name='workflow'): """Generate Pegasus abstract workflow (DAX). Parameters ---------- filename : `str` File to write the DAX to. name : `str`, optional Name of the DAX. Returns ------- `Pegasus.ADAG` Abstract workflow used by Pegasus' planner. Raises ------ `ValueError` If either task or file node is missing mandatory attribute. """ dax = ADAG(name) # Process file nodes. for file_id in self.files: attrs = self.graph.node[file_id] try: name = attrs['lfn'] except KeyError: msg = 'Mandatory attribute "%s" is missing.' raise AttributeError(msg.format('lfn')) file_ = File(name) # Add physical file names, if any. urls = attrs.get('pfn') if urls is not None: urls = urls.split(',') sites = attrs.get('sites') if sites is None: sites = len(urls) * ['condorpool'] for url, site in zip(urls, sites): file_.addPFN(PFN(url, site)) self.catalog[attrs['lfn']] = file_ # Add jobs to the DAX. for task_id in self.tasks: attrs = self.graph.node[task_id] try: name = attrs['exec_name'] except KeyError: msg = 'Mandatory attribute "%s" is missing.' raise AttributeError(msg.format('exec_name')) label = '{name}_{id}'.format(name=name, id=task_id) job = Job(name, id=task_id, node_label=label) # Add job command line arguments replacing any file name with # respective Pegasus file object. args = attrs.get('exec_args', []) if args: args = args.split() lfns = list(set(self.catalog) & set(args)) if lfns: indices = [args.index(lfn) for lfn in lfns] for idx, lfn in zip(indices, lfns): args[idx] = self.catalog[lfn] job.addArguments(*args) # Specify job's inputs. inputs = [file_id for file_id in self.graph.predecessors(task_id)] for file_id in inputs: attrs = self.graph.node[file_id] is_ignored = attrs.get('ignore', False) if not is_ignored: file_ = self.catalog[attrs['lfn']] job.uses(file_, link=Link.INPUT) # Specify job's outputs outputs = [file_id for file_id in self.graph.successors(task_id)] for file_id in outputs: attrs = self.graph.node[file_id] is_ignored = attrs.get('ignore', False) if not is_ignored: file_ = self.catalog[attrs['lfn']] job.uses(file_, link=Link.OUTPUT) streams = attrs.get('streams') if streams is not None: if streams & 1 != 0: job.setStdout(file_) if streams & 2 != 0: job.setStderr(file_) # Provide default files to store stderr and stdout, if not # specified explicitly. if job.stderr is None: file_ = File('{name}.out'.format(name=label)) job.uses(file_, link=Link.OUTPUT) job.setStderr(file_) if job.stdout is None: file_ = File('{name}.err'.format(name=label)) job.uses(file_, link=Link.OUTPUT) job.setStdout(file_) dax.addJob(job) # Add job dependencies to the DAX. for task_id in self.tasks: parents = set() for file_id in self.graph.predecessors(task_id): parents.update(self.graph.predecessors(file_id)) for parent_id in parents: dax.depends(parent=dax.getJob(parent_id), child=dax.getJob(task_id)) # Finally, write down the workflow in DAX format. with open(filename, 'w') as f: dax.writeXML(f)
def add_seeg_mrs_gain_computation_steps(self, dax, job_seeg_xyz, job_mapping_details): seeg_xyz = File(SEEGCompFiles.SEEG_XYZ.value) cort_surf = File(AsegFiles.SURF_CORT_ZIP.value) subcort_surf = File(AsegFiles.SURF_SUBCORT_ZIP.value) cort_rm = File(AsegFiles.RM_CORT_TXT.value % self.atlas_suffix) subcort_rm = File(AsegFiles.RM_SUBCORT_TXT.value % self.atlas_suffix) gain_mat = File(SeegGainFiles.SEEG_GAIN_MRS_MAT.value % self.atlas_suffix) job = Job(SeegGainJobNames.COMPUTE_SEEG_GAIN.value) job.addArguments(seeg_xyz, cort_surf, subcort_surf, cort_rm, subcort_rm, gain_mat, self.subject) job.uses(seeg_xyz, link=Link.INPUT) job.uses(cort_surf, link=Link.INPUT) job.uses(subcort_surf, link=Link.INPUT) job.uses(cort_rm, link=Link.INPUT) job.uses(subcort_rm, link=Link.INPUT) job.uses(gain_mat, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job) dax.depends(job, job_seeg_xyz) dax.depends(job, job_mapping_details)
def generate_dax(self): "Generate a workflow (DAX, config files, and replica catalog)" ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') dax = ADAG("refinement-%s" % ts) # These are all the global input files for the workflow coordinates = File(self.coordinates) parameters = File(self.parameters) extended_system = File(self.extended_system) topfile = File(self.topfile) sassena_db = File(self.sassena_db) incoherent_db = File(self.incoherent_db) coherent_db = File(self.coherent_db) # This job untars the sassena db and makes it available to the other # jobs in the workflow untarjob = Job("tar", node_label="untar") if self.is_synthetic_workflow: untarjob.addArguments("-p", "-xzvf", sassena_db.name) untarjob.addArguments("-a", "tar") for output_file in [ "incoherent_db", "coherent_db" ]: untarjob.addArguments(self.keg_params.output_file("tar", output_file, eval(output_file).name)) self.keg_params.add_keg_params(untarjob) else: untarjob.addArguments("-xzvf", sassena_db) untarjob.uses(sassena_db, link=Link.INPUT) untarjob.uses(incoherent_db, link=Link.OUTPUT, transfer=False) untarjob.uses(coherent_db, link=Link.OUTPUT, transfer=False) untarjob.profile("globus", "jobtype", "single") untarjob.profile("globus", "maxwalltime", "1") untarjob.profile("globus", "count", "1") dax.addJob(untarjob) # For each charge that was listed in the config file for charge in self.charges: structure = "Q%s.psf" % charge # Equilibrate files eq_conf = File("equilibrate_%s.conf" % charge) eq_coord = File("equilibrate_%s.restart.coord" % charge) eq_xsc = File("equilibrate_%s.restart.xsc" % charge) eq_vel = File("equilibrate_%s.restart.vel" % charge) # Production files prod_conf = File("production_%s.conf" % charge) prod_dcd = File("production_%s.dcd" % charge) # Ptraj files ptraj_conf = File("ptraj_%s.conf" % charge) ptraj_fit = File("ptraj_%s.fit" % charge) ptraj_dcd = File("ptraj_%s.dcd" % charge) # Sassena incoherent files incoherent_conf = File("sassenaInc_%s.xml" % charge) fqt_incoherent = File("fqt_inc_%s.hd5" % charge) # Sassena coherent files coherent_conf = File("sassenaCoh_%s.xml" % charge) fqt_coherent = File("fqt_coh_%s.hd5" % charge) # Generate psf and configuration files for this charge pipeline self.generate_psf(charge) self.generate_eq_conf(charge, structure) self.generate_prod_conf(charge, structure) self.generate_ptraj_conf(charge) self.generate_incoherent_conf(charge) self.generate_coherent_conf(charge) # Equilibrate job eqjob = Job("namd", node_label="namd_eq_%s" % charge) if self.is_synthetic_workflow: eqjob.addArguments("-p", eq_conf) eqjob.addArguments("-a", "namd_eq_%s" % charge) eqjob.addArguments("-i", eq_conf.name, structure, coordinates.name, parameters.name, extended_system.name) task_label = "namd-eq" for output_file in [ "eq_coord", "eq_xsc", "eq_vel" ]: eqjob.addArguments(self.keg_params.output_file(task_label, output_file, eval(output_file).name)) self.keg_params.add_keg_params(eqjob, task_label) else: eqjob.addArguments(eq_conf) eqjob.uses(eq_conf, link=Link.INPUT) eqjob.uses(structure, link=Link.INPUT) eqjob.uses(coordinates, link=Link.INPUT) eqjob.uses(parameters, link=Link.INPUT) eqjob.uses(extended_system, link=Link.INPUT) eqjob.uses(eq_coord, link=Link.OUTPUT, transfer=False) eqjob.uses(eq_xsc, link=Link.OUTPUT, transfer=False) eqjob.uses(eq_vel, link=Link.OUTPUT, transfer=False) if self.is_synthetic_workflow: eqjob.profile("globus", "jobtype", "mpi") eqjob.profile("globus", "maxwalltime", "1") eqjob.profile("globus", "count", "8") else: eqjob.profile("globus", "jobtype", "mpi") eqjob.profile("globus", "maxwalltime", self.getconf("equilibrate_maxwalltime")) eqjob.profile("globus", "count", self.getconf("equilibrate_cores")) dax.addJob(eqjob) # Production job prodjob = Job("namd", node_label="namd_prod_%s" % charge) if self.is_synthetic_workflow: prodjob.addArguments("-p", prod_conf) prodjob.addArguments("-a", "namd_prod_%s" % charge) prodjob.addArguments("-i", prod_conf.name, structure, coordinates.name, parameters.name, eq_coord.name, eq_xsc.name, eq_vel.name) task_label = "namd-prod" prodjob.addArguments(self.keg_params.output_file(task_label, "prod_dcd", prod_dcd.name)) self.keg_params.add_keg_params(prodjob, task_label) else: prodjob.addArguments(prod_conf) prodjob.uses(prod_conf, link=Link.INPUT) prodjob.uses(structure, link=Link.INPUT) prodjob.uses(coordinates, link=Link.INPUT) prodjob.uses(parameters, link=Link.INPUT) prodjob.uses(eq_coord, link=Link.INPUT) prodjob.uses(eq_xsc, link=Link.INPUT) prodjob.uses(eq_vel, link=Link.INPUT) prodjob.uses(prod_dcd, link=Link.OUTPUT, transfer=True) if self.is_synthetic_workflow: prodjob.profile("globus", "jobtype", "mpi") prodjob.profile("globus", "maxwalltime", "6") prodjob.profile("globus", "count", "8") else: prodjob.profile("globus", "jobtype", "mpi") prodjob.profile("globus", "maxwalltime", self.getconf("production_maxwalltime")) prodjob.profile("globus", "count", self.getconf("production_cores")) dax.addJob(prodjob) dax.depends(prodjob, eqjob) # ptraj job ptrajjob = Job(namespace="amber", name="ptraj", node_label="amber_ptraj_%s" % charge) if self.is_synthetic_workflow: ptrajjob.addArguments("-p", topfile) ptrajjob.addArguments("-a", "amber_ptraj_%s" % charge) ptrajjob.addArguments("-i", topfile.name, ptraj_conf.name, prod_dcd.name) task_label = "amber-ptraj" for output_file in [ "ptraj_fit", "ptraj_dcd" ]: ptrajjob.addArguments(self.keg_params.output_file(task_label, output_file, eval(output_file).name)) self.keg_params.add_keg_params(ptrajjob, task_label) else: ptrajjob.addArguments(topfile) ptrajjob.setStdin(ptraj_conf) ptrajjob.uses(topfile, link=Link.INPUT) ptrajjob.uses(ptraj_conf, link=Link.INPUT) ptrajjob.uses(prod_dcd, link=Link.INPUT) ptrajjob.uses(ptraj_fit, link=Link.OUTPUT, transfer=True) ptrajjob.uses(ptraj_dcd, link=Link.OUTPUT, transfer=True) ptrajjob.profile("globus", "jobtype", "single") ptrajjob.profile("globus", "maxwalltime", self.getconf("ptraj_maxwalltime")) ptrajjob.profile("globus", "count", self.getconf("ptraj_cores")) dax.addJob(ptrajjob) dax.depends(ptrajjob, prodjob) # sassena incoherent job incojob = Job("sassena", node_label="sassena_inc_%s" % charge) if self.is_synthetic_workflow: incojob.addArguments("-p", "--config", incoherent_conf) incojob.addArguments("-a", "sassena_inc_%s" % charge) incojob.addArguments("-i", incoherent_conf.name, ptraj_dcd.name, incoherent_db.name, coordinates.name) task_label = "sassena-inc" incojob.addArguments(self.keg_params.output_file(task_label, "fqt_incoherent", fqt_incoherent.name)) self.keg_params.add_keg_params(incojob, task_label) else: incojob.addArguments("--config", incoherent_conf) incojob.uses(incoherent_conf, link=Link.INPUT) incojob.uses(ptraj_dcd, link=Link.INPUT) incojob.uses(incoherent_db, link=Link.INPUT) incojob.uses(coordinates, link=Link.INPUT) incojob.uses(fqt_incoherent, link=Link.OUTPUT, transfer=True) if self.is_synthetic_workflow: incojob.profile("globus", "jobtype", "mpi") incojob.profile("globus", "maxwalltime", "6") incojob.profile("globus", "count", "8") else: incojob.profile("globus", "jobtype", "mpi") incojob.profile("globus", "maxwalltime", self.getconf("sassena_maxwalltime")) incojob.profile("globus", "count", self.getconf("sassena_cores")) dax.addJob(incojob) dax.depends(incojob, ptrajjob) dax.depends(incojob, untarjob) # sassena coherent job cojob = Job("sassena", node_label="sassena_coh_%s" % charge) if self.is_synthetic_workflow: cojob.addArguments("-p", "--config", coherent_conf) cojob.addArguments("-a", "sassena_coh_%s" % charge) cojob.addArguments("-i", coherent_conf.name, ptraj_dcd.name, coherent_db.name, coordinates.name) task_label = "sassena-coh" cojob.addArguments(self.keg_params.output_file(task_label, "fqt_coherent", fqt_coherent.name)) self.keg_params.add_keg_params(cojob, task_label) else: cojob.addArguments("--config", coherent_conf) cojob.uses(coherent_conf, link=Link.INPUT) cojob.uses(ptraj_dcd, link=Link.INPUT) cojob.uses(coherent_db, link=Link.INPUT) cojob.uses(coordinates, link=Link.INPUT) cojob.uses(fqt_coherent, link=Link.OUTPUT, transfer=True) if self.is_synthetic_workflow: cojob.profile("globus", "jobtype", "mpi") cojob.profile("globus", "maxwalltime", "6") cojob.profile("globus", "count", "8") else: cojob.profile("globus", "jobtype", "mpi") cojob.profile("globus", "maxwalltime", self.getconf("sassena_maxwalltime")) cojob.profile("globus", "count", self.getconf("sassena_cores")) dax.addJob(cojob) dax.depends(cojob, prodjob) dax.depends(cojob, untarjob) # Write the DAX file dax.writeXMLFile(self.daxfile)
def add_head_model_steps(self, dax, job_bem_surfaces): brain_surface = File(HeadModelFiles.BRAIN_SURFACE.value % self.subject) inner_skull_surface = File(HeadModelFiles.INNER_SKULL_SURFACE.value % self.subject) outer_skin_surface = File(HeadModelFiles.OUTER_SKIN_SURFACE.value % self.subject) outer_skull_surface = File(HeadModelFiles.OUTER_SKULL_SURFACE.value % self.subject) bem_surfs = [brain_surface, inner_skull_surface, outer_skin_surface, outer_skull_surface] brain_surface_low = File(HeadModelFiles.BRAIN_SURFACE_LOW.value % self.subject) inner_skull_surface_low = File(HeadModelFiles.INNER_SKULL_SURFACE_LOW.value % self.subject) outer_skin_surface_low = File(HeadModelFiles.OUTER_SKIN_SURFACE_LOW.value % self.subject) outer_skull_surface_low = File(HeadModelFiles.OUTER_SKULL_SURFACE_LOW.value % self.subject) bem_surfs_low = [brain_surface_low, inner_skull_surface_low, outer_skin_surface_low, outer_skull_surface_low] last_job = job_bem_surfaces for i, surf in enumerate(bem_surfs): job_resamp = Job(ResamplingJobNames.MRIS_DECIMATE.value) job_resamp.addArguments("-d", "0.1", surf, bem_surfs_low[i]) job_resamp.uses(surf, link=Link.INPUT) job_resamp.uses(bem_surfs_low[i], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job_resamp) dax.depends(job_resamp, job_bem_surfaces) last_job = job_resamp brain_surface_tri = File(HeadModelFiles.BRAIN_SURFACE_LOW_TRI.value % self.subject) inner_skull_surface_tri = File(HeadModelFiles.INNER_SKULL_SURFACE_LOW_TRI.value % self.subject) outer_skin_surface_tri = File(HeadModelFiles.OUTER_SKIN_SURFACE_LOW_TRI.value % self.subject) outer_skull_surface_tri = File(HeadModelFiles.OUTER_SKULL_SURFACE_LOW_TRI.value % self.subject) bem_tri_surfs = [brain_surface_tri, inner_skull_surface_tri, outer_skull_surface_tri, outer_skin_surface_tri] for idx, surf in enumerate(bem_surfs_low): tri_file = bem_tri_surfs[idx] job2 = Job(HeadModelJobNames.CONVERT_TO_BRAIN_VISA.value) job2.addArguments(surf, tri_file, self.subject) job2.uses(surf, link=Link.INPUT) job2.uses(tri_file, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) dax.depends(job2, last_job) last_job = job2 head_model_geom = File(HeadModelFiles.HEAD_MODEL_GEOM.value) head_model_cond = File(HeadModelFiles.HEAD_MODEL_COND.value) job4 = Job(HeadModelJobNames.GEN_HEAD_MODEL.value) job4.addArguments(self.subject, True) for surf in bem_tri_surfs: job4.uses(surf, link=Link.INPUT) job4.uses(head_model_geom, link=Link.OUTPUT, transfer=True, register=True) job4.uses(head_model_cond, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job4) dax.depends(job4, last_job) head_matrix = File(HeadModelFiles.HEAD_MAT.value) job5 = Job(HeadModelJobNames.OM_ASSEMBLE.value) job5.addArguments("-HM", head_model_geom, head_model_cond, head_matrix) for surf in bem_tri_surfs: job5.uses(surf, link=Link.INPUT) job5.uses(head_model_geom, link=Link.INPUT) job5.uses(head_model_cond, link=Link.INPUT) job5.uses(head_matrix, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job5) dax.depends(job5, job4) head_inv_matrix = File(HeadModelFiles.HEAD_INV_MAT.value) job6 = Job(HeadModelJobNames.OM_MINVERSER.value) job6.addArguments(head_matrix, head_inv_matrix) job6.uses(head_matrix, link=Link.INPUT) job6.uses(head_inv_matrix, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job6) dax.depends(job6, job5) return job6
def _add_flirt_steps(self, dax, job_b0, job_t1, job_aparc_aseg): b0_nii_gz = File(DWIFiles.B0_NII_GZ.value) t1_nii_gz = File(T1Files.T1_NII_GZ.value) d2t_mat = File(CoregFiles.D2T_MAT.value) b0_in_t1 = File(CoregFiles.B0_IN_T1.value) job1 = Job(CoregJobNames.FLIRT.value, node_label="Register DWI to T1") job1.addArguments(b0_nii_gz, t1_nii_gz, d2t_mat, b0_in_t1) job1.uses(b0_nii_gz, link=Link.INPUT) job1.uses(t1_nii_gz, link=Link.INPUT) job1.uses(d2t_mat, link=Link.OUTPUT, transfer=True, register=True) job1.uses(b0_in_t1, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_t1) dax.depends(job1, job_b0) self.qc_snapshots.add_2vols_snapshot_step(dax, [job1], t1_nii_gz, b0_in_t1) t2d_mat = File(CoregFiles.T2D_MAT.value) job2 = Job(CoregJobNames.CONVERT_XFM.value, node_label="Convert d2t matrix to t2d matrix") job2.addArguments("-omat", t2d_mat, "-inverse", d2t_mat) job2.uses(d2t_mat, link=Link.INPUT) job2.uses(t2d_mat, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job2) dax.depends(job2, job1) t1_in_d_nii_gz = File(CoregFiles.T1_IN_D.value) job3 = Job(CoregJobNames.FLIRT_REVERSED.value, node_label="Register T1 to DWI") job3.addArguments(t1_nii_gz, b0_nii_gz, t1_in_d_nii_gz, t2d_mat) job3.uses(t1_nii_gz, link=Link.INPUT) job3.uses(b0_nii_gz, link=Link.INPUT) job3.uses(t2d_mat, link=Link.INPUT) job3.uses(t1_in_d_nii_gz, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job3) dax.depends(job3, job2) self.qc_snapshots.add_2vols_snapshot_step(dax, [job3], t1_in_d_nii_gz, b0_nii_gz) aparc_aseg_nii_gz = File(T1Files.APARC_ASEG_NII_GZ.value % self.atlas_suffix) aparc_aseg_in_d_nii_gz = File(CoregFiles.APARC_ASEG_IN_D.value % self.atlas_suffix) job4 = Job(CoregJobNames.FLIRT_REVERSED.value, node_label="Register APARC+ASEG to DWI") job4.addArguments(aparc_aseg_nii_gz, b0_nii_gz, aparc_aseg_in_d_nii_gz, t2d_mat) job4.uses(aparc_aseg_nii_gz, link=Link.INPUT) job4.uses(b0_nii_gz, link=Link.INPUT) job4.uses(t2d_mat, link=Link.INPUT) job4.uses(aparc_aseg_in_d_nii_gz, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job4) dax.depends(job4, job2) dax.depends(job4, job_aparc_aseg) self.qc_snapshots.add_2vols_snapshot_step(dax, [job4], aparc_aseg_in_d_nii_gz, b0_nii_gz) self.qc_snapshots.add_3vols_snapshot_step(dax, [job3, job4], t1_in_d_nii_gz, b0_nii_gz, aparc_aseg_in_d_nii_gz) return job3, job4
def _annotate_gtf(self, read_length): annotate_gtf = Job(name='annotate_gtf') annotate_gtf.invoke('all', self._state_update % 'Generating annotation FASTA files') prefix = self._get_index_hash(read_length) # Inputs gtf = File('%s.gtf' % self._species.name) chromosomes = self._species.chromosomes for i in chromosomes: chr_i = File('%s/chr%s.fa' % (self._species.name, i)) # Uses annotate_gtf.uses(chr_i, link=Link.INPUT) # Outputs features = File('h%s/FEATURES.fa' % prefix) chrs = File('h%s/GENOME.fa' % prefix) splices = File('h%s/SPLICES.fa' % prefix) genes = File('h%s/GENE.fa' % prefix) # Arguments annotate_gtf.addArguments(gtf, '-c', self._species.name, '-p h%s/' % prefix, '-l %d' % read_length) # Uses annotate_gtf.uses(gtf, link=Link.INPUT) annotate_gtf.uses(features, link=Link.OUTPUT, transfer=False, register=False) annotate_gtf.uses(chrs, link=Link.OUTPUT, transfer=False, register=False) annotate_gtf.uses(splices, link=Link.OUTPUT, transfer=False, register=False) annotate_gtf.uses(genes, link=Link.OUTPUT, transfer=False, register=False) self.adag.addJob(annotate_gtf)
def add_surface_resampling_steps(self, dax, job_recon): t1_mgz = File(T1Files.T1_MGZ.value) lh_pial = File(T1Files.LH_PIAL.value) rh_pial = File(T1Files.RH_PIAL.value) pials = [lh_pial, rh_pial] lh_aparc_annot = File(T1Files.LH_APARC_ANNOT.value % self.atlas_suffix) rh_aparc_annot = File(T1Files.RH_APARC_ANNOT.value % self.atlas_suffix) aparcs = [lh_aparc_annot, rh_aparc_annot] lh_pial_resamp = File(ResamplingFiles.LH_PIAL_RESAMP.value % self.trg_subject) rh_pial_resamp = File(ResamplingFiles.RH_PIAL_RESAMP.value % self.trg_subject) pials_resamp = [lh_pial_resamp, rh_pial_resamp] lh_aparc_annot_resamp = File( ResamplingFiles.LH_APARC_ANNOT_RESAMP.value % (self.trg_subject, self.atlas_suffix)) rh_aparc_annot_resamp = File( ResamplingFiles.RH_APARC_ANNOT_RESAMP.value % (self.trg_subject, self.atlas_suffix)) aparcs_resamp = [lh_aparc_annot_resamp, rh_aparc_annot_resamp] last_job = None for idx, hemi in enumerate(["lh", "rh"]): job1 = Job(ResamplingJobNames.MRI_SURF2SURF.value) job1.addArguments(self.atlas_suffix, "--srcsubject", self.subject, "--trgsubject", self.trg_subject, "--hemi", hemi, "--sval-xyz", "pial", "--tval", "pial-%s" % self.trg_subject, "--tval-xyz", t1_mgz) job1.uses(t1_mgz, link=Link.INPUT) job1.uses(pials[idx], link=Link.INPUT) job1.uses(pials_resamp[idx], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_recon) job2 = Job(ResamplingJobNames.MRI_SURF2SURF.value) job2.addArguments(self.atlas_suffix, "--srcsubject", self.subject, "--trgsubject", self.trg_subject, "--hemi", hemi, "--sval-annot", aparcs[idx], "--tval", aparcs_resamp[idx]) job2.uses(aparcs[idx], link=Link.INPUT) job2.uses(aparcs_resamp[idx], link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) dax.depends(job2, job_recon) last_job = job2 lh_centered_pial = File(ResamplingFiles.LH_CENTERED_PIAL_RESAMP.value % self.trg_subject) job5 = Job(T1JobNames.MRIS_CONVERT.value) job5.addArguments("--to-scanner", lh_pial_resamp, lh_centered_pial) job5.uses(lh_pial_resamp, link=Link.INPUT) job5.uses(lh_centered_pial, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job5) dax.depends(job5, last_job) rh_centered_pial = File(ResamplingFiles.RH_CENTERED_PIAL_RESAMP.value % self.trg_subject) job6 = Job(T1JobNames.MRIS_CONVERT.value) job6.addArguments("--to-scanner", rh_pial_resamp, rh_centered_pial) job6.uses(rh_pial_resamp, link=Link.INPUT) job6.uses(rh_centered_pial, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job6) dax.depends(job6, last_job) t1_nii_gz = File(T1Files.T1_NII_GZ.value) self.qc_snapshots.add_vol_surf_snapshot_step(dax, [job5, job6], t1_nii_gz, [lh_centered_pial, rh_centered_pial]) self.qc_snapshots.add_surf_annot_snapshot_step(dax, [job5, job6], lh_centered_pial, lh_aparc_annot_resamp) self.qc_snapshots.add_surf_annot_snapshot_step(dax, [job5, job6], rh_centered_pial, rh_aparc_annot_resamp) return job6
def add_t1_processing_steps(self, dax, resamp_flag): t1_input = Inputs.T1_INPUT.value t1_converted = T1Files.T1_INPUT_CONVERTED.value t1_output, job1 = self._ensure_input_format(self.t1_format, t1_input, t1_converted, dax) aparc_aseg_mgz_vol = File(T1Files.APARC_ASEG_MGZ.value % self.atlas_suffix) lh_pial = File(T1Files.LH_PIAL.value) rh_pial = File(T1Files.RH_PIAL.value) lh_white = File(T1Files.LH_WHITE.value) rh_white = File(T1Files.RH_WHITE.value) lh_aparc_annot = File(T1Files.LH_APARC_ANNOT.value % self.atlas_suffix) rh_aparc_annot = File(T1Files.RH_APARC_ANNOT.value % self.atlas_suffix) out_files_list = [ aparc_aseg_mgz_vol, lh_pial, rh_pial, lh_white, rh_white, lh_aparc_annot, rh_aparc_annot ] t1_mgz_output = File(T1Files.T1_MGZ.value) norm_mgz_vol = File(T1Files.NORM_MGZ.value) brain_mgz_vol = File(T1Files.BRAIN_MGZ.value) job2 = Job(T1JobNames.RECON_ALL.value, node_label="Recon-all for T1") job2.addArguments(self.subject, t1_output, self.openmp_threads, self.atlas_suffix) job2.uses(t1_output, link=Link.INPUT) job2.uses(t1_mgz_output, link=Link.OUTPUT, transfer=True, register=True) job2.uses(norm_mgz_vol, link=Link.OUTPUT, transfer=True, register=True) job2.uses(brain_mgz_vol, link=Link.OUTPUT, transfer=True, register=True) if self.t2_flag != "True": self._add_output_files(job2, out_files_list) dax.addJob(job2) if job1 is not None: dax.depends(job2, job1) last_job = job2 if self.t2_flag == "True": t2_in = Inputs.T2_INPUT.value t2_converted = T1Files.T2_CONVERTED.value t2_input, job_convert = self._ensure_input_format( self.t2_format, t2_in, t2_converted, dax) job = Job(T1JobNames.AUTORECON3_T2.value) job.addArguments(self.subject, t2_input, self.openmp_threads) job.uses(t2_input, link=Link.INPUT) self._add_output_files(job, out_files_list) dax.addJob(job) if job_convert is not None: dax.depends(job, job_convert) dax.depends(job, last_job) last_job = job if self.flair_flag == "True": flair_in = Inputs.FLAIR_INPUT.value flair_converted = T1Files.FLAIR_CONVERTED.value flair_input, job_convert = self._ensure_input_format( self.flair_format, flair_in, flair_converted, dax) job = Job(T1JobNames.AUTORECON3_FLAIR.value) job.addArguments(self.subject, flair_input, self.openmp_threads) job.uses(flair_input, link=Link.INPUT) self._add_output_files(job, out_files_list) dax.addJob(job) if job_convert is not None: dax.depends(job, job_convert) dax.depends(job, last_job) last_job = job t1_nii_gz_vol = File(T1Files.T1_NII_GZ.value) job3 = Job(T1JobNames.MRI_CONVERT.value, node_label="Convert T1 to NIFTI with good orientation") job3.addArguments(t1_mgz_output, t1_nii_gz_vol, "--out_orientation", "RAS") job3.uses(t1_mgz_output, link=Link.INPUT) job3.uses(t1_nii_gz_vol, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job3) dax.depends(job3, last_job) aparc_aseg_nii_gz_vol = File(T1Files.APARC_ASEG_NII_GZ.value % self.atlas_suffix) job4 = Job( T1JobNames.MRI_CONVERT.value, node_label="Convert APARC+ASEG to NIFTI with good orientation") job4.addArguments(aparc_aseg_mgz_vol, aparc_aseg_nii_gz_vol, "--out_orientation", "RAS", "-rt", "nearest") job4.uses(aparc_aseg_mgz_vol, link=Link.INPUT) job4.uses(aparc_aseg_nii_gz_vol, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job4) dax.depends(job4, last_job) if resamp_flag != "True": lh_centered_pial = File(T1Files.LH_CENTERED_PIAL.value) job5 = Job(T1JobNames.MRIS_CONVERT.value) job5.addArguments("--to-scanner", lh_pial, lh_centered_pial) job5.uses(lh_pial, link=Link.INPUT) job5.uses(lh_centered_pial, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job5) dax.depends(job5, last_job) rh_centered_pial = File(T1Files.RH_CENTERED_PIAL.value) job6 = Job(T1JobNames.MRIS_CONVERT.value) job6.addArguments("--to-scanner", rh_pial, rh_centered_pial) job6.uses(rh_pial, link=Link.INPUT) job6.uses(rh_centered_pial, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job6) dax.depends(job6, last_job) self.qc_snapshots.add_vol_surf_snapshot_step( dax, [job3, job5, job6], t1_nii_gz_vol, [lh_centered_pial, rh_centered_pial]) self.qc_snapshots.add_surf_annot_snapshot_step( dax, [last_job, job5, job6], lh_centered_pial, lh_aparc_annot) self.qc_snapshots.add_surf_annot_snapshot_step( dax, [last_job, job5, job6], rh_centered_pial, rh_aparc_annot) return job3, job4
def add_t1_processing_steps(self, dax, resamp_flag): t1_input = Inputs.T1_INPUT.value t1_converted = T1Files.T1_INPUT_CONVERTED.value t1_output, job1 = self._ensure_input_format(self.t1_format, t1_input, t1_converted, dax) aparc_aseg_mgz_vol = File(T1Files.APARC_ASEG_MGZ.value % self.atlas_suffix) lh_pial = File(T1Files.LH_PIAL.value) rh_pial = File(T1Files.RH_PIAL.value) lh_white = File(T1Files.LH_WHITE.value) rh_white = File(T1Files.RH_WHITE.value) lh_aparc_annot = File(T1Files.LH_APARC_ANNOT.value % self.atlas_suffix) rh_aparc_annot = File(T1Files.RH_APARC_ANNOT.value % self.atlas_suffix) out_files_list = [aparc_aseg_mgz_vol, lh_pial, rh_pial, lh_white, rh_white, lh_aparc_annot, rh_aparc_annot] t1_mgz_output = File(T1Files.T1_MGZ.value) norm_mgz_vol = File(T1Files.NORM_MGZ.value) brain_mgz_vol = File(T1Files.BRAIN_MGZ.value) job2 = Job(T1JobNames.RECON_ALL.value, node_label="Recon-all for T1") job2.addArguments(self.subject, t1_output, self.openmp_threads, self.atlas_suffix) job2.uses(t1_output, link=Link.INPUT) job2.uses(t1_mgz_output, link=Link.OUTPUT, transfer=True, register=True) job2.uses(norm_mgz_vol, link=Link.OUTPUT, transfer=True, register=True) job2.uses(brain_mgz_vol, link=Link.OUTPUT, transfer=True, register=True) if self.t2_flag != "True": self._add_output_files(job2, out_files_list) dax.addJob(job2) if job1 is not None: dax.depends(job2, job1) last_job = job2 if self.t2_flag == "True": t2_in = Inputs.T2_INPUT.value t2_converted = T1Files.T2_CONVERTED.value t2_input, job_convert = self._ensure_input_format(self.t2_format, t2_in, t2_converted, dax) job = Job(T1JobNames.AUTORECON3_T2.value) job.addArguments(self.subject, t2_input, self.openmp_threads) job.uses(t2_input, link=Link.INPUT) self._add_output_files(job, out_files_list) dax.addJob(job) if job_convert is not None: dax.depends(job, job_convert) dax.depends(job, last_job) last_job = job if self.flair_flag == "True": flair_in = Inputs.FLAIR_INPUT.value flair_converted = T1Files.FLAIR_CONVERTED.value flair_input, job_convert = self._ensure_input_format(self.flair_format, flair_in, flair_converted, dax) job = Job(T1JobNames.AUTORECON3_FLAIR.value) job.addArguments(self.subject, flair_input, self.openmp_threads) job.uses(flair_input, link=Link.INPUT) self._add_output_files(job, out_files_list) dax.addJob(job) if job_convert is not None: dax.depends(job, job_convert) dax.depends(job, last_job) last_job = job t1_nii_gz_vol = File(T1Files.T1_NII_GZ.value) job3 = Job(T1JobNames.MRI_CONVERT.value, node_label="Convert T1 to NIFTI with good orientation") job3.addArguments(t1_mgz_output, t1_nii_gz_vol, "--out_orientation", "RAS") job3.uses(t1_mgz_output, link=Link.INPUT) job3.uses(t1_nii_gz_vol, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job3) dax.depends(job3, last_job) aparc_aseg_nii_gz_vol = File(T1Files.APARC_ASEG_NII_GZ.value % self.atlas_suffix) job4 = Job(T1JobNames.MRI_CONVERT.value, node_label="Convert APARC+ASEG to NIFTI with good orientation") job4.addArguments(aparc_aseg_mgz_vol, aparc_aseg_nii_gz_vol, "--out_orientation", "RAS", "-rt", "nearest") job4.uses(aparc_aseg_mgz_vol, link=Link.INPUT) job4.uses(aparc_aseg_nii_gz_vol, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job4) dax.depends(job4, last_job) if resamp_flag != "True": lh_centered_pial = File(T1Files.LH_CENTERED_PIAL.value) job5 = Job(T1JobNames.MRIS_CONVERT.value) job5.addArguments("--to-scanner", lh_pial, lh_centered_pial) job5.uses(lh_pial, link=Link.INPUT) job5.uses(lh_centered_pial, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job5) dax.depends(job5, last_job) rh_centered_pial = File(T1Files.RH_CENTERED_PIAL.value) job6 = Job(T1JobNames.MRIS_CONVERT.value) job6.addArguments("--to-scanner", rh_pial, rh_centered_pial) job6.uses(rh_pial, link=Link.INPUT) job6.uses(rh_centered_pial, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job6) dax.depends(job6, last_job) self.qc_snapshots.add_vol_surf_snapshot_step(dax, [job3, job5, job6], t1_nii_gz_vol, [lh_centered_pial, rh_centered_pial]) self.qc_snapshots.add_surf_annot_snapshot_step(dax, [last_job, job5, job6], lh_centered_pial, lh_aparc_annot) self.qc_snapshots.add_surf_annot_snapshot_step(dax, [last_job, job5, job6], rh_centered_pial, rh_aparc_annot) return job3, job4
Profile(namespace="pegasus", key="clusters.size", value=config.get('all', 'clusters_size'))) sleep.addProfile( Profile(namespace="pegasus", key="clusters.maxruntime", value=config.get('all', 'clusters_maxruntime'))) cluster.addExecutable(sleep) for i in range(4): job = Job(namespace="cluster", name="level1", version="1.0") job.addArguments('-a level1 -T ' + str(i + 1)) job.addArguments('-i', a) job.addProfile( Profile(namespace="pegasus", key="job.runtime", value=str(i + 1))) job.uses(a, link=Link.INPUT) cluster.addJob(job) for j in range(4): child = Job(namespace="cluster", name="level2", version="1.0") child.addArguments('-a level2 -T ' + str((j + 1) * 2)) child.addProfile( Profile(namespace="pegasus", key="runtime", value=str( (j + 1) * 2))) cluster.addJob(child) cluster.depends(parent=job, child=child) # Write the DAX to standard out cluster.writeXML(sys.stdout)
def step_coregister_t1_dwi(main_dax, config, relevant_t1_job, relevant_dwi_job): LOGGER.info("FLIRT co-registration of T1 with DWI...") job1 = Job(name="mri_convert", node_label="Convert T1 to NIFTI with good orientation") job1.addArguments(config.mri.t1_mgz_file, config.mri.t1_nii_file, "--out_orientation", "RAS", "-rt", "nearest") job1.uses(config.mri.t1_mgz_file, link=Link.INPUT) job1.uses(config.mri.t1_nii_file, link=Link.OUTPUT, transfer=True, register=False) job2 = Job(name="flirt", node_label="Register DWI to T1 and get the relevant transform") job2.addArguments("-in", config.diffusion.b0, "-ref", config.mri.t1_nii_file, "-omat", config.mri.d2t_file, "-out", config.mri.b0_in_t1_file, "-dof", "12", "-searchrx", "-180", "180", "-searchry", "-180", "180", "-searchrz", "-180", "180", "-cost", "mutualinfo") job2.uses(config.diffusion.b0, link=Link.INPUT) job2.uses(config.mri.t1_nii_file, link=Link.INPUT) job2.uses(config.mri.d2t_file, link=Link.OUTPUT, transfer=True, register=False) job2.uses(config.mri.b0_in_t1_file, link=Link.OUTPUT, transfer=True, register=False) job3 = Job(name="convert_xfm", node_label="Generate inverse transform from T1 to DWI") job3.addArguments("-omat", config.mri.t2d_file, "-inverse", config.mri.d2t_file) job3.uses(config.mri.d2t_file, link=Link.INPUT) job3.uses(config.mri.t2d_file, link=Link.OUTPUT, transfer=True, register=False) job4 = Job(name="flirt", node_label="Apply inverse transform from T1 to DWI") job4.addArguments("-applyxfm", "-in", config.mri.t1_nii_file, "-ref", config.diffusion.b0, "-out", config.mri.t1_in_d_file, "-init", config.mri.t2d_file, "-interp", "nearestneighbour") job4.uses(config.mri.t1_nii_file, link=Link.INPUT) job4.uses(config.diffusion.b0, link=Link.INPUT) job4.uses(config.mri.t2d_file, link=Link.INPUT) job4.uses(config.mri.t1_in_d_file, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job1) main_dax.addJob(job2) main_dax.addJob(job3) main_dax.addJob(job4) main_dax.depends(job1, relevant_t1_job) main_dax.depends(job2, relevant_dwi_job) main_dax.depends(job2, job1) main_dax.depends(job3, job2) main_dax.depends(job4, job3) LOGGER.debug("FLIRT co-registration of T1 with DWI steps added.")
def add_lead_field_model_steps(self, dax, job_sensor_model_lh, job_sensor_model_rh): head_inv_matrix = File(HeadModelFiles.HEAD_INV_MAT.value) head2ipm_file = File(SensorModelFiles.SEEG_H2IPM.value) lh_white_dsm = File(SourceModelFiles.LH_WHITE_RESAMP_DSM.value % (self.trg_subject, self.atlas_suffix)) lh_ds2ipm_file = File(SensorModelFiles.LH_DS2IPM.value % (self.trg_subject, self.atlas_suffix)) lh_cortical_gain = File(LeadFieldModelFiles.LH_CORT_GAIN_H5.value % self.atlas_suffix) job1 = Job(LeadFieldModelJobNames.OM_GAIN.value) job1.addArguments("-InternalPotential", head_inv_matrix, lh_white_dsm, head2ipm_file, lh_ds2ipm_file, lh_cortical_gain) job1.uses(head_inv_matrix, link=Link.INPUT) job1.uses(lh_white_dsm, link=Link.INPUT) job1.uses(head2ipm_file, link=Link.INPUT) job1.uses(lh_ds2ipm_file, link=Link.INPUT) job1.uses(lh_cortical_gain, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_sensor_model_lh) rh_white_dsm = File(SourceModelFiles.RH_WHITE_RESAMP_DSM.value % (self.trg_subject, self.atlas_suffix)) rh_ds2ipm_file = File(SensorModelFiles.RH_DS2IPM.value % (self.trg_subject, self.atlas_suffix)) rh_cortical_gain = File(LeadFieldModelFiles.RH_CORT_GAIN_H5.value % self.atlas_suffix) job2 = Job(LeadFieldModelJobNames.OM_GAIN.value) job2.addArguments("-InternalPotential", head_inv_matrix, rh_white_dsm, head2ipm_file, rh_ds2ipm_file, rh_cortical_gain) job2.uses(head_inv_matrix, link=Link.INPUT) job2.uses(rh_white_dsm, link=Link.INPUT) job2.uses(head2ipm_file, link=Link.INPUT) job2.uses(rh_ds2ipm_file, link=Link.INPUT) job2.uses(rh_cortical_gain, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) dax.depends(job2, job_sensor_model_rh)
def add_mapping_details_computation_step(self, dax, job_lh_aseg, job_rh_aseg, job_resampling=None): # Input files: fs_lut = File(Inputs.FS_LUT.value) t1_file = File(T1Files.T1_NII_GZ.value) lh_aseg_annot = File(AsegFiles.LH_ASEG_ANNOT.value) rh_aseg_annot = File(AsegFiles.RH_ASEG_ANNOT.value) lh_subcort = File(AsegFiles.LH_CENTERED_ASEG.value) rh_subcort = File(AsegFiles.RH_CENTERED_ASEG.value) if job_resampling is None: lh_cortical_file = File(T1Files.LH_CENTERED_PIAL.value) rh_cortical_file = File(T1Files.RH_CENTERED_PIAL.value) lh_cortical_annot_file = File(T1Files.LH_APARC_ANNOT.value % self.atlas_suffix) rh_cortical_annot_file = File(T1Files.RH_APARC_ANNOT.value % self.atlas_suffix) else: lh_cortical_file = File(ResamplingFiles.LH_CENTERED_PIAL_RESAMP.value % self.trg_subj) rh_cortical_file = File(ResamplingFiles.RH_CENTERED_PIAL_RESAMP.value % self.trg_subj) lh_cortical_annot_file = File( ResamplingFiles.LH_APARC_ANNOT_RESAMP.value % (self.trg_subj, self.atlas_suffix)) rh_cortical_annot_file = File( ResamplingFiles.RH_APARC_ANNOT_RESAMP.value % (self.trg_subj, self.atlas_suffix)) # Output files: fs_custom = File(AsegFiles.FS_CUSTOM_TXT.value % self.atlas_suffix) centers = File(AsegFiles.CENTERS_TXT.value % self.atlas_suffix) areas = File(AsegFiles.AREAS_TXT.value % self.atlas_suffix) orientations = File(AsegFiles.ORIENTATIONS_TXT.value % self.atlas_suffix) cortical = File(AsegFiles.CORTICAL_TXT.value % self.atlas_suffix) rm_to_aparc_aseg = File(AsegFiles.RM_TO_APARC_ASEG_TXT.value % self.atlas_suffix) cort_region_mapping = File(AsegFiles.RM_CORT_TXT.value % self.atlas_suffix) subcort_region_mapping = File(AsegFiles.RM_SUBCORT_TXT.value % self.atlas_suffix) cort_surface = File(AsegFiles.SURF_CORT_ZIP.value) subcort_surface = File(AsegFiles.SURF_SUBCORT_ZIP.value) lh_dipoles = File(AsegFiles.LH_DIPOLES_TXT.value % self.atlas_suffix) rh_dipoles = File(AsegFiles.RH_DIPOLES_TXT.value % self.atlas_suffix) # Job config: job9 = Job(AsegGenJobNames.GEN_MAPPING_DETAILS.value) job9.addArguments(self.atlas_suffix, fs_lut, t1_file, lh_cortical_file, rh_cortical_file, lh_cortical_annot_file, rh_cortical_annot_file, lh_subcort, rh_subcort, lh_aseg_annot, rh_aseg_annot) job9.uses(fs_lut, link=Link.INPUT) job9.uses(t1_file, link=Link.INPUT) job9.uses(lh_cortical_annot_file, link=Link.INPUT) job9.uses(rh_cortical_annot_file, link=Link.INPUT) job9.uses(lh_cortical_file, link=Link.INPUT) job9.uses(rh_cortical_file, link=Link.INPUT) job9.uses(lh_aseg_annot, link=Link.INPUT) job9.uses(rh_aseg_annot, link=Link.INPUT) job9.uses(lh_subcort, link=Link.INPUT) job9.uses(rh_subcort, link=Link.INPUT) job9.uses(fs_custom, link=Link.OUTPUT, transfer=True, register=True) job9.uses(centers, link=Link.OUTPUT, transfer=True, register=True) job9.uses(areas, link=Link.OUTPUT, transfer=True, register=True) job9.uses(orientations, link=Link.OUTPUT, transfer=True, register=True) job9.uses(cortical, link=Link.OUTPUT, transfer=True, register=True) job9.uses(rm_to_aparc_aseg, link=Link.OUTPUT, transfer=True, register=True) job9.uses(cort_region_mapping, link=Link.OUTPUT, transfer=True, register=True) job9.uses(subcort_region_mapping, link=Link.OUTPUT, transfer=True, register=True) job9.uses(cort_surface, link=Link.OUTPUT, transfer=True, register=True) job9.uses(subcort_surface, link=Link.OUTPUT, transfer=True, register=True) job9.uses(lh_dipoles, link=Link.OUTPUT, transfer=True, register=True) job9.uses(rh_dipoles, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job9) dax.depends(job9, job_lh_aseg) dax.depends(job9, job_rh_aseg) if job_resampling is not None: dax.depends(job9, job_resampling) return job9
def add_tracts_generation_steps(self, dax, job_t1_in_d, job_mask, job_aparc_aseg_in_d, job_fs_custom): t1_in_d = File(CoregFiles.T1_IN_D.value) file_5tt = File(TractsGenFiles.FILE_5TT_MIF.value) job1 = Job(TractsGenJobNames.JOB_5TTGEN.value, node_label="Generate 5tt MIF") job1.addArguments(t1_in_d, file_5tt) job1.uses(t1_in_d, link=Link.INPUT) job1.uses(file_5tt, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_t1_in_d) file_gmwmi = File(TractsGenFiles.GMWMI_MIF.value) job2 = Job(TractsGenJobNames.JOB_5TT2GMWMI.value, node_label="Extract GMWMI") job2.addArguments(file_5tt, file_gmwmi, "-nthreads", self.mrtrix_threads) job2.uses(file_5tt, link=Link.INPUT) job2.uses(file_gmwmi, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job2) dax.depends(job2, job1) file_gmwmi_nii_gz = File(TractsGenFiles.GMWMI_NII_GZ.value) job_gmwmi_convert = Job(DWIJobNames.MRCONVERT.value) job_gmwmi_convert.addArguments(file_gmwmi, file_gmwmi_nii_gz) job_gmwmi_convert.uses(file_gmwmi, link=Link.INPUT) job_gmwmi_convert.uses(file_gmwmi_nii_gz, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job_gmwmi_convert) dax.depends(job_gmwmi_convert, job2) self.qc_snapshots.add_2vols_snapshot_step(dax, [job_gmwmi_convert], t1_in_d, file_gmwmi_nii_gz) file_5ttvis = File(TractsGenFiles.FILE_5TTVIS_MIF.value) job3 = Job(TractsGenJobNames.JOB_5TT2VIS.value, node_label="Generate TT2VIS MIF") job3.addArguments(file_5tt, file_5ttvis) job3.uses(file_5tt, link=Link.INPUT) job3.uses(file_5ttvis, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job3) dax.depends(job3, job2) file_wm_fod = None last_job = None dwi_mif = File(DWIFiles.DWI_MIF.value) mask_mif = File(DWIFiles.MASK_MIF.value) if self.dwi_multi_shell == "True": file_RF_WM = File(TractsGenFiles.RF_WM.value) file_RF_GM = File(TractsGenFiles.RF_GM.value) file_RF_CSF = File(TractsGenFiles.RF_CSF.value) file_RF_voxels = File(TractsGenFiles.RF_VOXELS.value) job4 = Job(TractsGenJobNames.DWI2RESPONSE_MSMT.value) job4.addArguments(dwi_mif, file_5tt, file_RF_WM, file_RF_GM, file_RF_CSF, file_RF_voxels, self.mrtrix_threads) job4.uses(dwi_mif, link=Link.INPUT) job4.uses(file_5tt, link=Link.INPUT) job4.uses(file_RF_WM, link=Link.OUTPUT, transfer=True, register=False) job4.uses(file_RF_GM, link=Link.OUTPUT, transfer=True, register=False) job4.uses(file_RF_CSF, link=Link.OUTPUT, transfer=True, register=False) job4.uses(file_RF_voxels, link=Link.OUTPUT, transfer=True, register=False) dax.addJob(job4) dax.depends(job4, job3) gm_mif = File(DWIFiles.GM_MIF.value) csf_mif = File(DWIFiles.CSF_MIF.value) file_wm_fod = File(TractsGenFiles.WM_FOD_MIF.value) # TODO: does msdwi2fod exist? should we use dwi2fod with the same args? job5 = Job(TractsGenJobNames.MSDWI2FOD.value) job5.addArguments("msmt_csd", dwi_mif, file_RF_WM, file_wm_fod, file_RF_GM, gm_mif, file_RF_CSF, csf_mif, "-mask", mask_mif, "-nthreads", self.mrtrix_threads) job5.uses(dwi_mif, link=Link.INPUT) job5.uses(file_RF_WM, link=Link.INPUT) job5.uses(file_RF_GM, link=Link.INPUT) job5.uses(file_RF_CSF, link=Link.INPUT) job5.uses(mask_mif, link=Link.INPUT) job5.uses(file_wm_fod, link=Link.OUTPUT, transfer=True, register=False) job5.uses(gm_mif, link=Link.OUTPUT, transfer=True, register=False) job5.uses(csf_mif, link=Link.OUTPUT, transfer=True, register=False) dax.addJob(job5) dax.depends(job5, job4) last_job = job5 else: file_response = File(TractsGenFiles.RESPONSE_TXT.value) job4 = Job(TractsGenJobNames.DWI2RESPONSE.value, node_label="Compute the DWI Response") job4.addArguments(dwi_mif, file_response, mask_mif) job4.uses(dwi_mif, link=Link.INPUT) job4.uses(mask_mif, link=Link.INPUT) job4.uses(file_response, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job4) dax.depends(job4, job_mask) file_wm_fod = File(TractsGenFiles.WM_FOD_MIF.value) job5 = Job(TractsGenJobNames.DWI2FOD.value, node_label="Obtain WM FOD") job5.addArguments("csd", dwi_mif, file_response, file_wm_fod, "-mask", mask_mif, "-nthreads", self.mrtrix_threads) job5.uses(dwi_mif, link=Link.INPUT) job5.uses(file_response, link=Link.INPUT) job5.uses(mask_mif, link=Link.INPUT) job5.uses(file_wm_fod, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job5) dax.depends(job5, job4) last_job = job5 file_strmlns = File(TractsGenFiles.FILE_TCK.value % self.strmlns_no) job6 = Job(TractsGenJobNames.TCKGEN.value, node_label="Generate tracts") if self.os == "LINUX": job6.addArguments(file_wm_fod, file_strmlns, "-select", self.strmlns_no, "-seed_gmwmi", file_gmwmi, "-act", file_5tt, "-seed_unidirectional", "-maxlength", self.strmlns_size, "-step", self.strmlns_step, "-nthreads", self.mrtrix_threads) else: job6.addArguments(file_wm_fod, file_strmlns, "-number", self.strmlns_no, "-seed_gmwmi", file_gmwmi, "-act", file_5tt, "-unidirectional", "-maxlength", self.strmlns_size, "-step", self.strmlns_step, "-nthreads", self.mrtrix_threads) job6.uses(file_wm_fod, link=Link.INPUT) job6.uses(file_gmwmi, link=Link.INPUT) job6.uses(file_5tt, link=Link.INPUT) job6.uses(file_strmlns, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job6) dax.depends(job6, last_job) dax.depends(job6, job1) file_strmlns_sift = File(TractsGenFiles.FILE_SIFT_TCK.value % self.strmlns_sift_no) job7 = Job(TractsGenJobNames.TCKSIFT.value, node_label="Tracts SIFT") job7.addArguments(file_strmlns, file_wm_fod, file_strmlns_sift, "-term_number", self.strmlns_sift_no, "-act", file_5tt, "-nthreads", self.mrtrix_threads) job7.uses(file_strmlns, link=Link.INPUT) job7.uses(file_wm_fod, link=Link.INPUT) job7.uses(file_5tt, link=Link.INPUT) job7.uses(file_strmlns_sift, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job7) dax.depends(job7, job6) dax.depends(job7, job1) b0_nii_gz = File(DWIFiles.B0_NII_GZ.value) file_tdi_ends = File(TractsGenFiles.TDI_ENDS_MIF.value) job8 = Job(TractsGenJobNames.TCKMAP.value, node_label="TCKMAP") job8.addArguments(file_strmlns_sift, file_tdi_ends, "-vox", "1", "-template", b0_nii_gz) job8.uses(file_strmlns_sift, link=Link.INPUT) job8.uses(b0_nii_gz, link=Link.INPUT) job8.uses(file_tdi_ends, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job8) dax.depends(job8, job7) file_tdi_ends_nii_gz = File(TractsGenFiles.TDI_ENDS_NII_GZ.value) job_convert_tdi_ends = Job(DWIJobNames.MRCONVERT.value) job_convert_tdi_ends.addArguments(file_tdi_ends, file_tdi_ends_nii_gz) job_convert_tdi_ends.uses(file_tdi_ends, link=Link.INPUT) job_convert_tdi_ends.uses(file_tdi_ends_nii_gz, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job_convert_tdi_ends) dax.depends(job_convert_tdi_ends, job8) self.qc_snapshots.add_2vols_snapshot_step(dax, [job_convert_tdi_ends], t1_in_d, file_tdi_ends_nii_gz) fs_custom = File(AsegFiles.FS_CUSTOM_TXT.value % self.atlas_suffix) aparc_aseg_in_d = File(CoregFiles.APARC_ASEG_IN_D.value % self.atlas_suffix) file_vol_lbl = File(TractsGenFiles.VOLUME_LBL_NII_GZ.value % self.atlas_suffix) fs_color_lut = File(Inputs.FS_LUT.value) job9 = Job(TractsGenJobNames.LABEL_CONVERT.value, node_label="Compute APARC+ASEG labeled for tracts") job9.addArguments(aparc_aseg_in_d, fs_color_lut, fs_custom, file_vol_lbl) job9.uses(aparc_aseg_in_d, link=Link.INPUT) job9.uses(fs_color_lut, link=Link.INPUT) job9.uses(fs_custom, link=Link.INPUT) job9.uses(file_vol_lbl, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job9) dax.depends(job9, job_fs_custom) dax.depends(job9, job_aparc_aseg_in_d) self.qc_snapshots.add_2vols_snapshot_step(dax, [job9], t1_in_d, file_vol_lbl) file_aparc_aseg_counts5M_csv = File(TractsGenFiles.TRACT_COUNTS.value % self.atlas_suffix) job10 = Job(TractsGenJobNames.TCK2CONNECTOME.value, node_label="Generate weigths") job10.addArguments(file_strmlns_sift, file_vol_lbl, "-assignment_radial_search", "2", file_aparc_aseg_counts5M_csv) job10.uses(file_strmlns_sift, link=Link.INPUT) job10.uses(file_vol_lbl, link=Link.INPUT) job10.uses(file_aparc_aseg_counts5M_csv, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job10) dax.depends(job10, job7) dax.depends(job10, job9) file_aparc_aseg_mean_tract_lengths5M_csv = File(TractsGenFiles.TRACT_LENGHTS.value % self.atlas_suffix) job11 = Job(TractsGenJobNames.TCK2CONNECTOME.value, node_label="Generate tract lengths") job11.addArguments(file_strmlns_sift, file_vol_lbl, "-assignment_radial_search", "2", "-scale_length", "-stat_edge", "mean", file_aparc_aseg_mean_tract_lengths5M_csv) job11.uses(file_strmlns_sift, link=Link.INPUT) job11.uses(file_vol_lbl, link=Link.INPUT) job11.uses(file_aparc_aseg_mean_tract_lengths5M_csv, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job11) dax.depends(job11, job7) dax.depends(job11, job9) return job10, job11
def add_tracts_generation_steps(self, dax, job_t1_in_d, job_mask, job_aparc_aseg_in_d, job_fs_custom): t1_in_d = File(CoregFiles.T1_IN_D.value) file_5tt = File(TractsGenFiles.FILE_5TT_MIF.value) job1 = Job(TractsGenJobNames.JOB_5TTGEN.value, node_label="Generate 5tt MIF") job1.addArguments(t1_in_d, file_5tt) job1.uses(t1_in_d, link=Link.INPUT) job1.uses(file_5tt, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_t1_in_d) file_gmwmi = File(TractsGenFiles.GMWMI_MIF.value) job2 = Job(TractsGenJobNames.JOB_5TT2GMWMI.value, node_label="Extract GMWMI") job2.addArguments(file_5tt, file_gmwmi, "-nthreads", self.mrtrix_threads) job2.uses(file_5tt, link=Link.INPUT) job2.uses(file_gmwmi, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job2) dax.depends(job2, job1) file_gmwmi_nii_gz = File(TractsGenFiles.GMWMI_NII_GZ.value) job_gmwmi_convert = Job(DWIJobNames.MRCONVERT.value) job_gmwmi_convert.addArguments(file_gmwmi, file_gmwmi_nii_gz) job_gmwmi_convert.uses(file_gmwmi, link=Link.INPUT) job_gmwmi_convert.uses(file_gmwmi_nii_gz, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job_gmwmi_convert) dax.depends(job_gmwmi_convert, job2) self.qc_snapshots.add_2vols_snapshot_step(dax, [job_gmwmi_convert], t1_in_d, file_gmwmi_nii_gz) file_5ttvis = File(TractsGenFiles.FILE_5TTVIS_MIF.value) job3 = Job(TractsGenJobNames.JOB_5TT2VIS.value, node_label="Generate TT2VIS MIF") job3.addArguments(file_5tt, file_5ttvis) job3.uses(file_5tt, link=Link.INPUT) job3.uses(file_5ttvis, link=Link.OUTPUT, transfer=False, register=False) dax.addJob(job3) dax.depends(job3, job2) file_wm_fod = None last_job = None dwi_mif = File(DWIFiles.DWI_MIF.value) mask_mif = File(DWIFiles.MASK_MIF.value) if self.dwi_multi_shell == "True": file_RF_WM = File(TractsGenFiles.RF_WM.value) file_RF_GM = File(TractsGenFiles.RF_GM.value) file_RF_CSF = File(TractsGenFiles.RF_CSF.value) file_RF_voxels = File(TractsGenFiles.RF_VOXELS.value) job4 = Job(TractsGenJobNames.DWI2RESPONSE_MSMT.value) job4.addArguments(dwi_mif, file_5tt, file_RF_WM, file_RF_GM, file_RF_CSF, file_RF_voxels, self.mrtrix_threads) job4.uses(dwi_mif, link=Link.INPUT) job4.uses(file_5tt, link=Link.INPUT) job4.uses(file_RF_WM, link=Link.OUTPUT, transfer=True, register=False) job4.uses(file_RF_GM, link=Link.OUTPUT, transfer=True, register=False) job4.uses(file_RF_CSF, link=Link.OUTPUT, transfer=True, register=False) job4.uses(file_RF_voxels, link=Link.OUTPUT, transfer=True, register=False) dax.addJob(job4) dax.depends(job4, job3) gm_mif = File(DWIFiles.GM_MIF.value) csf_mif = File(DWIFiles.CSF_MIF.value) file_wm_fod = File(TractsGenFiles.WM_FOD_MIF.value) # TODO: does msdwi2fod exist? should we use dwi2fod with the same args? job5 = Job(TractsGenJobNames.MSDWI2FOD.value) job5.addArguments("msmt_csd", dwi_mif, file_RF_WM, file_wm_fod, file_RF_GM, gm_mif, file_RF_CSF, csf_mif, "-mask", mask_mif, "-nthreads", self.mrtrix_threads) job5.uses(dwi_mif, link=Link.INPUT) job5.uses(file_RF_WM, link=Link.INPUT) job5.uses(file_RF_GM, link=Link.INPUT) job5.uses(file_RF_CSF, link=Link.INPUT) job5.uses(mask_mif, link=Link.INPUT) job5.uses(file_wm_fod, link=Link.OUTPUT, transfer=True, register=False) job5.uses(gm_mif, link=Link.OUTPUT, transfer=True, register=False) job5.uses(csf_mif, link=Link.OUTPUT, transfer=True, register=False) dax.addJob(job5) dax.depends(job5, job4) last_job = job5 else: file_response = File(TractsGenFiles.RESPONSE_TXT.value) job4 = Job(TractsGenJobNames.DWI2RESPONSE.value, node_label="Compute the DWI Response") job4.addArguments(dwi_mif, file_response, mask_mif) job4.uses(dwi_mif, link=Link.INPUT) job4.uses(mask_mif, link=Link.INPUT) job4.uses(file_response, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job4) dax.depends(job4, job_mask) file_wm_fod = File(TractsGenFiles.WM_FOD_MIF.value) job5 = Job(TractsGenJobNames.DWI2FOD.value, node_label="Obtain WM FOD") job5.addArguments("csd", dwi_mif, file_response, file_wm_fod, "-mask", mask_mif, "-nthreads", self.mrtrix_threads) job5.uses(dwi_mif, link=Link.INPUT) job5.uses(file_response, link=Link.INPUT) job5.uses(mask_mif, link=Link.INPUT) job5.uses(file_wm_fod, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job5) dax.depends(job5, job4) last_job = job5 file_strmlns = File(TractsGenFiles.FILE_TCK.value % self.strmlns_no) job6 = Job(TractsGenJobNames.TCKGEN.value, node_label="Generate tracts") if self.os == "LINUX": job6.addArguments(file_wm_fod, file_strmlns, "-select", self.strmlns_no, "-seed_gmwmi", file_gmwmi, "-act", file_5tt, "-seed_unidirectional", "-maxlength", self.strmlns_size, "-step", self.strmlns_step, "-nthreads", self.mrtrix_threads) else: job6.addArguments(file_wm_fod, file_strmlns, "-number", self.strmlns_no, "-seed_gmwmi", file_gmwmi, "-act", file_5tt, "-unidirectional", "-maxlength", self.strmlns_size, "-step", self.strmlns_step, "-nthreads", self.mrtrix_threads) job6.uses(file_wm_fod, link=Link.INPUT) job6.uses(file_gmwmi, link=Link.INPUT) job6.uses(file_5tt, link=Link.INPUT) job6.uses(file_strmlns, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job6) dax.depends(job6, last_job) dax.depends(job6, job1) file_strmlns_sift = File(TractsGenFiles.FILE_SIFT_TCK.value % self.strmlns_sift_no) job7 = Job(TractsGenJobNames.TCKSIFT.value, node_label="Tracts SIFT") job7.addArguments(file_strmlns, file_wm_fod, file_strmlns_sift, "-term_number", self.strmlns_sift_no, "-act", file_5tt, "-nthreads", self.mrtrix_threads) job7.uses(file_strmlns, link=Link.INPUT) job7.uses(file_wm_fod, link=Link.INPUT) job7.uses(file_5tt, link=Link.INPUT) job7.uses(file_strmlns_sift, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job7) dax.depends(job7, job6) dax.depends(job7, job1) b0_nii_gz = File(DWIFiles.B0_NII_GZ.value) file_tdi_ends = File(TractsGenFiles.TDI_ENDS_MIF.value) job8 = Job(TractsGenJobNames.TCKMAP.value, node_label="TCKMAP") job8.addArguments(file_strmlns_sift, file_tdi_ends, "-vox", "1", "-template", b0_nii_gz) job8.uses(file_strmlns_sift, link=Link.INPUT) job8.uses(b0_nii_gz, link=Link.INPUT) job8.uses(file_tdi_ends, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job8) dax.depends(job8, job7) file_tdi_ends_nii_gz = File(TractsGenFiles.TDI_ENDS_NII_GZ.value) job_convert_tdi_ends = Job(DWIJobNames.MRCONVERT.value) job_convert_tdi_ends.addArguments(file_tdi_ends, file_tdi_ends_nii_gz) job_convert_tdi_ends.uses(file_tdi_ends, link=Link.INPUT) job_convert_tdi_ends.uses(file_tdi_ends_nii_gz, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job_convert_tdi_ends) dax.depends(job_convert_tdi_ends, job8) self.qc_snapshots.add_2vols_snapshot_step(dax, [job_convert_tdi_ends], t1_in_d, file_tdi_ends_nii_gz) fs_custom = File(AsegFiles.FS_CUSTOM_TXT.value % self.atlas_suffix) aparc_aseg_in_d = File(CoregFiles.APARC_ASEG_IN_D.value % self.atlas_suffix) file_vol_lbl = File(TractsGenFiles.VOLUME_LBL_NII_GZ.value % self.atlas_suffix) fs_color_lut = File(Inputs.FS_LUT.value) job9 = Job(TractsGenJobNames.LABEL_CONVERT.value, node_label="Compute APARC+ASEG labeled for tracts") job9.addArguments(aparc_aseg_in_d, fs_color_lut, fs_custom, file_vol_lbl) job9.uses(aparc_aseg_in_d, link=Link.INPUT) job9.uses(fs_color_lut, link=Link.INPUT) job9.uses(fs_custom, link=Link.INPUT) job9.uses(file_vol_lbl, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job9) dax.depends(job9, job_fs_custom) dax.depends(job9, job_aparc_aseg_in_d) self.qc_snapshots.add_2vols_snapshot_step(dax, [job9], t1_in_d, file_vol_lbl) file_aparc_aseg_counts5M_csv = File(TractsGenFiles.TRACT_COUNTS.value % self.atlas_suffix) job10 = Job(TractsGenJobNames.TCK2CONNECTOME.value, node_label="Generate weigths") job10.addArguments(file_strmlns_sift, file_vol_lbl, "-assignment_radial_search", "2", file_aparc_aseg_counts5M_csv) job10.uses(file_strmlns_sift, link=Link.INPUT) job10.uses(file_vol_lbl, link=Link.INPUT) job10.uses(file_aparc_aseg_counts5M_csv, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job10) dax.depends(job10, job7) dax.depends(job10, job9) file_aparc_aseg_mean_tract_lengths5M_csv = File( TractsGenFiles.TRACT_LENGHTS.value % self.atlas_suffix) job11 = Job(TractsGenJobNames.TCK2CONNECTOME.value, node_label="Generate tract lengths") job11.addArguments(file_strmlns_sift, file_vol_lbl, "-assignment_radial_search", "2", "-scale_length", "-stat_edge", "mean", file_aparc_aseg_mean_tract_lengths5M_csv) job11.uses(file_strmlns_sift, link=Link.INPUT) job11.uses(file_vol_lbl, link=Link.INPUT) job11.uses(file_aparc_aseg_mean_tract_lengths5M_csv, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job11) dax.depends(job11, job7) dax.depends(job11, job9) return job10, job11
def add_sensor_model_steps(self, dax, job_source_model): # TODO: seeg positions file should contain only positions, not labels in order to work with OpenMEEG seeg_xyz = File(SEEGCompFiles.SEEG_XYZ.value) head_model_geom = File(HeadModelFiles.HEAD_MODEL_GEOM.value) head_model_cond = File(HeadModelFiles.HEAD_MODEL_COND.value) bem_tri_surfs = [File(HeadModelFiles.INNER_SKULL_SURFACE_LOW_TRI.value % self.subject), File(HeadModelFiles.OUTER_SKULL_SURFACE_LOW_TRI.value % self.subject), File(HeadModelFiles.OUTER_SKIN_SURFACE_LOW_TRI.value % self.subject), File(HeadModelFiles.BRAIN_SURFACE_LOW_TRI.value % self.subject)] head2ipm_file = File(SensorModelFiles.SEEG_H2IPM.value) job1 = Job(HeadModelJobNames.OM_ASSEMBLE.value) job1.addArguments("-h2ipm", head_model_geom, head_model_cond, seeg_xyz, head2ipm_file) for surf in bem_tri_surfs: job1.uses(surf, link=Link.INPUT) job1.uses(head_model_geom, link=Link.INPUT) job1.uses(head_model_cond, link=Link.INPUT) job1.uses(seeg_xyz, link=Link.INPUT) job1.uses(head2ipm_file, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job1) dax.depends(job1, job_source_model) lh_white_dsm = File(SourceModelFiles.LH_WHITE_RESAMP_DSM.value % (self.trg_subject, self.atlas_suffix)) lh_ds2ipm_file = File(SensorModelFiles.LH_DS2IPM.value % (self.trg_subject, self.atlas_suffix)) job2 = Job(HeadModelJobNames.OM_ASSEMBLE.value) job2.addArguments("-ds2ipm", head_model_geom, head_model_cond, lh_white_dsm, seeg_xyz, lh_ds2ipm_file) for surf in bem_tri_surfs: job2.uses(surf, link=Link.INPUT) job2.uses(head_model_geom, link=Link.INPUT) job2.uses(head_model_cond, link=Link.INPUT) job2.uses(lh_white_dsm, link=Link.INPUT) job2.uses(seeg_xyz, link=Link.INPUT) job2.uses(lh_ds2ipm_file, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job2) dax.depends(job2, job1) rh_white_dsm = File(SourceModelFiles.RH_WHITE_RESAMP_DSM.value % (self.trg_subject, self.atlas_suffix)) rh_ds2ipm_file = File(SensorModelFiles.RH_DS2IPM.value % (self.trg_subject, self.atlas_suffix)) job3 = Job(HeadModelJobNames.OM_ASSEMBLE.value) job3.addArguments("-ds2ipm", head_model_geom, head_model_cond, rh_white_dsm, seeg_xyz, rh_ds2ipm_file) for surf in bem_tri_surfs: job3.uses(surf, link=Link.INPUT) job3.uses(head_model_geom, link=Link.INPUT) job3.uses(head_model_cond, link=Link.INPUT) job3.uses(rh_white_dsm, link=Link.INPUT) job3.uses(seeg_xyz, link=Link.INPUT) job3.uses(rh_ds2ipm_file, link=Link.OUTPUT, transfer=True, register=True) dax.addJob(job3) dax.depends(job3, job1) return job2, job3
# Add input file to the DAX-level replica catalog a = File("f.a") a.addPFN(PFN(config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site'))) cluster.addFile(a) for i in range (1, 3): sleep = Executable (namespace = "cluster", name = "level" + str (i), version = "1.0", os = "linux", arch = "x86", installed=config.getboolean('all', 'executable_installed')) sleep.addPFN (PFN (config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site'))) sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.size", value = config.get('all', 'clusters_size'))) sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.maxruntime", value = config.get('all', 'clusters_maxruntime'))) cluster.addExecutable(sleep) for i in range (4): job = Job (namespace = "cluster", name = "level1", version = "1.0") job.addArguments('-a level1 -T ' + str (i + 1)) job.addArguments('-i', a) job.addProfile (Profile (namespace = "pegasus", key = "job.runtime", value = str (i + 1))) job.uses(a, link=Link.INPUT) cluster.addJob (job) for j in range (4): child = Job (namespace = "cluster", name = "level2", version = "1.0") child.addArguments('-a level2 -T ' + str ((j + 1) * 2)) child.addProfile (Profile (namespace = "pegasus", key = "runtime", value = str ((j + 1) * 2))) cluster.addJob (child) cluster.depends (parent = job, child = child) # Write the DAX to standard out cluster.writeXML (sys.stdout)
def _clipr(self, clip_to, reads, tag): anchor = self._compute_clip_seed(self._read_length) clip_reads = Job(name='clipR') clip_reads.invoke('all', self._state_update % 'Generate new splice candidates') seed = 'F%s' % self._clip_seed mismatches = self._clip_mismatches # Input files prefix = self._get_index_hash(self._read_length) fa = File('h%s/%s.fa' % (prefix, clip_to.upper())) reads_txt = File('%s_%s_reads.txt' % (tag, clip_to.lower())) for i in self._range(): # Input files reads_i = File(reads % i) # Output files file_type = 'sam' path, file_name, ext = GTFAR._get_filename_parts(reads_i.name) sam_mapping = '%s_A_%d_%d_%d_%s.%s' % (clip_to.upper(), self._clip_seed, mismatches, anchor, file_name, file_type) fastq_out = File('%s_miss_%s%s' % (file_name, clip_to, ext)) # Uses clip_reads.uses(reads_i, link=Link.INPUT) clip_reads.uses(fastq_out, link=Link.OUTPUT, transfer=False, register=False) clip_reads.uses(sam_mapping, link=Link.OUTPUT, transfer=False, register=False) # Output files log = File('%s_%s.log' % (tag, clip_to.lower())) # Arguments clip_reads.addArguments(fa, reads_txt, '--seed %s' % seed, '--anchorL %d' % anchor, '-e', '-v %d' % mismatches) clip_reads.addArguments('-s', '-u', '--noSamHeader', '--ignoreDummyR %d' % 40, '--ignoreRepeatR %d' % 15) clip_reads.setStdout(log) # Uses clip_reads.uses(fa, link=Link.INPUT) clip_reads.uses(reads_txt, link=Link.INPUT) clip_reads.uses(log, link=Link.OUTPUT, transfer=False, register=False) self.adag.addJob(clip_reads)
def steps_dwi_preproc(main_dax, dwi_config): """ :param main_dax: Append to main DAX DWI steps :param dwi_config: Patient based configuration """ LOGGER.info("DWI pre-processing %s" % (dwi_config.folder_path, )) if dwi_config.is_dwi_reversed: LOGGER.info("Reversed DWI") if dwi_config.is_dicom: LOGGER.info("DICOM identified for DWI") job0 = Job(name="mrchoose", node_label=dwi_config.prefix.upper() + " input pre-processing 0 (Reversed, DICOM)") job0.addArguments("0", "mrconvert", dwi_config.folder, dwi_config.dwi_raw_mif_file) job0.uses(dwi_config.folder, link=Link.INPUT) job0.uses(dwi_config.dwi_raw_mif_file, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job0) job1 = Job(name="mrchoose", node_label=dwi_config.prefix.upper() + " input pre-processing 1 (Reversed, DICOM)") job1.addArguments("1", "mrconvert", dwi_config.folder, dwi_config.dwi_raw_re_file) job1.uses(dwi_config.folder, link=Link.INPUT) job1.uses(dwi_config.dwi_raw_re_file, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job1) else: LOGGER.info("Not DICOM %s" % dwi_config.file_format) job0 = Job(name="mrconvert", node_label=dwi_config.prefix.upper() + " input convert (Reversed, not-dicom)") job0.addArguments(dwi_config.raw_nii_file, dwi_config.dwi_raw_mif_file) job0.uses(dwi_config.raw_nii_file, link=Link.INPUT) job0.uses(dwi_config.dwi_raw_mif_file, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job0) job1 = Job(name="mrconvert", node_label=dwi_config.prefix.upper() + " input convert RE (Reversed, not-dicom)") job1.addArguments(dwi_config.dwi_nii_re_file, dwi_config.dwi_raw_re_file) job1.uses(dwi_config.dwi_nii_re_file, link=Link.INPUT) job1.uses(dwi_config.dwi_raw_re_file, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job1) main_dax.depends(job1, job0) job2 = Job(name="dwipreproc", node_label="Preprocess with eddy correct (Reversed)") job2.addArguments(dwi_config.scan_direction, dwi_config.dwi_raw_mif_file, dwi_config.dwi_mif_file, "-rpe_pair", dwi_config.dwi_raw_mif_file, dwi_config.dwi_raw_re_file, "-nthreads", dwi_config.number_of_threads_mrtrix) job2.uses(dwi_config.dwi_raw_mif_file, link=Link.INPUT) job2.uses(dwi_config.dwi_mif_file, link=Link.OUTPUT, transfer=True, register=False) job2.uses(dwi_config.dwi_raw_re_file, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job2) else: LOGGER.info("Simple DWI (non-reversed)") job1 = Job(name="mrconvert", node_label="Convert dicoms or nifti to .mif (Non-Reversed)") job1.addArguments(dwi_config.folder, dwi_config.dwi_raw_mif_file) job1.uses(dwi_config.folder, link=Link.INPUT) job1.uses(dwi_config.dwi_raw_mif_file, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job1) job2 = Job(name="dwipreproc", node_label="Preprocess with eddy correct (Non-Reversed)") job2.addArguments(dwi_config.scan_direction, dwi_config.dwi_raw_mif_file, dwi_config.dwi_mif_file, "-rpe_none", "-nthreads", dwi_config.number_of_threads_mrtrix) job2.uses(dwi_config.dwi_raw_mif_file, link=Link.INPUT) job2.uses(dwi_config.dwi_mif_file, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job2) job3 = Job(name="dwi2mask", node_label="Create Brain Mask") job3.addArguments(dwi_config.dwi_mif_file, dwi_config.brain_mask, "-nthreads", dwi_config.number_of_threads_mrtrix) job3.uses(dwi_config.dwi_mif_file, link=Link.INPUT) job3.uses(dwi_config.brain_mask, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job3) job4 = Job(name="dwiextract", node_label="Extract BZERO") job4.addArguments(dwi_config.dwi_mif_file, dwi_config.b0, "-bzero", "-nthreads", dwi_config.number_of_threads_mrtrix) job4.uses(dwi_config.dwi_mif_file, link=Link.INPUT) job4.uses(dwi_config.b0, link=Link.OUTPUT, transfer=True, register=False) main_dax.addJob(job4) # Add control-flow dependencies main_dax.depends(job2, job1) main_dax.depends(job3, job2) main_dax.depends(job4, job3) LOGGER.debug("DWI pre-processing steps added %s" % job4) return job4