def autoGeneratePrereqs(self): work_dir=self.workspace.work_dir self.anchor=Input(Workspace(work_dir, self.ref_file)) self.anchor.prereq=self.merge self.query=Input(Workspace(work_dir, self.merge.getOutputFile())) self.query.prereq=self.anchor self.query.step_dir="input_for_"+self.getStepDir()
class ReferenceAlignment(Step): def __init__(self, workspace, merge, ref_file): self.workspace=workspace self.merge=merge self.ref_file=ref_file self.quality=None file_data=self.ref_file.split('/') prefix_data=file_data[len(file_data)-1].split('.') self.output_prefix=prefix_data[0] self.send_output_to_file=True self.send_error_to_file=True self.output_veto_regex="_intervals.txt$" self.res=2.9 self.pval=1e-10 ### self.fp=0.6 ### self.fn=0.06 ### self.sf=0.20 self.sd=0.10 # self.sd=0.20 # self.sr=0.03 self.allow_overhang=True self.outlier_pval=0.0001 self.end_outlier_pval=0.001 self.max_query_alignment_interval=12 self.max_reference_alignment_interval=12 self.min_sites_for_chimera=14 self.hash_window=5 self.hash_min_sites=3 self.hash_sd_max=2.2 self.hash_sd_rms=1.2 self.hash_relative_error=0.05 self.hash_offset_kb=3.0 self.hash_max_insert_errors=1 self.hash_max_probe_errors=1 self.hash_max_unresolved_sites=1 self.hash_delta=50 self.target_resolution=1e-3 self.resolution_reduction=1.2 self.allow_no_splits=True self.allow_infinite_splits=False self.scale_bias_wt=0 self.overwrite_output=True self.print_indel_file=True self.autoGeneratePrereqs() def __hash__(self): return hash((self.workspace.input_file, self.workspace.work_dir, self.ref_file)) def __str__(self): return("Comparison of " + self.workspace.input_file + " to " + self.ref_file) @staticmethod def generateFromStepDir(step_dir, blocks=None): match_result=re.match("comparison_([^_]+)_([\d\.]+)_([\d\.]+)_([\d\.e-]+)_([\d]+)_([\d]+)_([^/]+)", step_dir) work_dir=os.getcwd() input_file=match_result.group(1) workspace=Workspace(work_dir, input_file) fp=match_result.group(2) fn=match_result.group(3) pval=match_result.group(4) minlen=match_result.group(5) minsites=match_result.group(6) vital_parameters=VitalParameters(fp, fn, pval, minlen, minsites) vital_parameters.blocks=blocks merge=Merge(workspace, vital_parameters) ref_file=match_result.group(7) return ReferenceAlignment(workspace, merge, ref_file) def writeCode(self): code="cd " + self.workspace.work_dir + "\n" code+="mkdir " + self.getStepDir() + "\n" code+="cd " + self.getStepDir() + "\n" code+="pwd" + "\n" param_values=OrderedDict() param_values["-ref"]= "../" + self.anchor.getOutputFile() param_values["-i"]= "../" + self.query.getOutputFile() param_values["-o"]= self.output_prefix param_values["-maxthreads"]= str(self.getThreads()) param_values["-insertThreads"]= str(self.getThreads()) maxmem=self.getMem() / self.getThreads() if maxmem < 1: maxmem=1 param_values["-maxmem"]= str(maxmem) param_values["-output-veto-filter"]= self.output_veto_regex param_values["-res"]= str(self.res) param_values["-T"]= str(self.pval) param_values["-FP"]= str(self.fp) param_values["-FN"]= str(self.fn) param_values["-sf"]= str(self.sf) param_values["-sd"]= str(self.sd) param_values["-extend"]= "1" if self.allow_overhang else "0" param_values["-outlier"]= str(self.outlier_pval) param_values["-endoutlier"]= str(self.end_outlier_pval) param_values["-deltaX"]= str(self.max_query_alignment_interval) param_values["-deltaY"]= str(self.max_reference_alignment_interval) param_values["-xmapchim"]= str(self.min_sites_for_chimera) param_values["-hashgen"]= " ".join([str(self.hash_window), str(self.hash_min_sites), str(self.hash_sd_max), str(self.hash_sd_rms), str(self.hash_relative_error), str(self.hash_offset_kb), str(self.hash_max_insert_errors), str(self.hash_max_probe_errors), str(self.hash_max_unresolved_sites)]) param_values["-hash"]= "" param_values["-hashdelta"]= str(self.hash_delta) param_values["-mres"]= str(self.target_resolution) param_values["-rres"]= str(self.resolution_reduction) param_values["-nosplit"]= "2" if self.allow_no_splits else "0" if self.allow_infinite_splits else "1" param_values["-biaswt"]= str(self.scale_bias_wt) if self.send_output_to_file: param_values["-stdout"]="" if self.send_error_to_file: param_values["-stderr"]="" if self.overwrite_output: param_values["-force"]="" if self.print_indel_file: param_values["-indel"]="" param_list=[self.workspace.binaries["bng_ref_aligner"]] for key in param_values: param_list.append(key) param_list.append(param_values[key]) code+=" ".join(param_list) return [code] def getStepDir(self): # return self.step_dir return "_".join(["comparison", self.workspace.input_file, str(self.merge.assembly.vital_parameters.fp), str(self.merge.assembly.vital_parameters.fn), str(self.merge.assembly.vital_parameters.pval), str(self.merge.assembly.vital_parameters.min_molecule_len), str(self.merge.assembly.vital_parameters.min_molecule_sites), self.ref_file]) def getOutputFile(self): return self.getStepDir() + "/" + self.output_prefix + "." + self.getOutputFileExtension() def getOutputFileExtension(self): return "xmap" def autoGeneratePrereqs(self): work_dir=self.workspace.work_dir self.anchor=Input(Workspace(work_dir, self.ref_file)) self.anchor.prereq=self.merge self.query=Input(Workspace(work_dir, self.merge.getOutputFile())) self.query.prereq=self.anchor self.query.step_dir="input_for_"+self.getStepDir() def getPrereq(self): return self.query def loadQualityReportItems(self): if self.quality is None: self.loadQualityObjectFromFile() report_items=OrderedDict() report_items["Num alignments: " + str(self.quality.num_alignments)]=1 num_query_contigs=self.query.loadQuality_count() report_items["Num query contigs total: " + str(num_query_contigs)]=3 report_items["Num query contigs that don't align: " + str(num_query_contigs-self.quality.aligned_query_contig_num)]=2 self.quality.unaligned_query_contig_num=num_query_contigs-self.quality.aligned_query_contig_num report_items["Total length: " + str(self.quality.total_length)]=2 report_items["Query length: " + str(self.quality.total_query_length)]=3 report_items["Proportion of query with match: " + str(self.quality.proportion_query)]=1 report_items["Average proportion of query contig within match: " + str(self.quality.average_proportion_of_query_matching)]=2 report_items["Anchor length: " + str(self.quality.total_anchor_length)]=3 report_items["Proportion of anchor with match: " + str(self.quality.proportion_anchor)]=1 report_items["Total confidence: " + str(self.quality.total_confidence)]=2 report_items["Max confidence: " + str(self.quality.max_confidence)]=2 report_items["Min confidence: " + str(self.quality.min_confidence)]=2 report_items["Average confidence: " + str(self.quality.average_confidence)]=3 report_items["Weighted average confidence: " + str(self.quality.weighted_average_confidence)]=1 return report_items def createQualityObject(self): proportion_of_query_matching=0.0 total_query_length=0.0 total_anchor_length=0.0 num_alignments=0 aligned_query_contigs=set() total_confidence=0.0 confidence_times_length=0.0 total_length=0.0 max_confidence=0.0 min_confidence=maxint for alignment in XmapFile(self.getOutputFile()).parse(): num_alignments+=1 aligned_query_contigs.add(alignment.query_id) conf=alignment.confidence total_confidence+=conf if conf > max_confidence: max_confidence=conf if conf < min_confidence: min_confidence=conf query_length=abs(alignment.query_end-alignment.query_start) total_query_length+=query_length proportion_of_query_matching+=query_length/alignment.query_len anchor_length=abs(alignment.anchor_end-alignment.anchor_start) total_anchor_length+=anchor_length # length=max(query_length, anchor_length) length=query_length total_length+=length confidence_times_length+=conf*length weighted_average_confidence=confidence_times_length/total_length proportion_query=query_length / self.query.loadQuality_length() proportion_anchor=total_anchor_length / self.anchor.loadQuality_length() average_confidence=total_confidence / num_alignments self.quality=Quality( num_alignments=num_alignments, aligned_query_contig_num=len(aligned_query_contigs), total_length=total_length, total_query_length=total_query_length, proportion_query=proportion_query, average_proportion_of_query_matching=(proportion_of_query_matching/num_alignments), total_anchor_length=total_anchor_length, proportion_anchor=proportion_anchor, total_confidence=total_confidence, min_confidence=min_confidence, max_confidence=max_confidence, average_confidence=average_confidence, weighted_average_confidence=weighted_average_confidence ) self.saveQualityObjectToFile() def getQuality_weightedAverageConfidence(self): if self.quality is None: self.loadQualityObjectFromFile() return self.quality.weighted_average_confidence def getQuality_totalLength(self): if self.quality is None: self.loadQualityObjectFromFile() return self.quality.total_length def getMem(self): return self.workspace.resources.getSmallMemory() def getTime(self): return self.workspace.resources.getSmallTime() def getThreads(self): return self.workspace.resources.getSmallThreads()