class tXmapFile(tFile_base): def setUp(self): with open(self.input_file, "w"): self.obj=XmapFile(self.input_file) def test_getExtension(self): self.assertEquals("xmap", XmapFile.getExtension()) def test_parse(self): expected=XmapFile_iter(self.input_file) self.assertEqual(expected, self.obj.parse()) def test_write(self): expected="1 2 3 1.0 2.0 1.0 2.0 + 999.9 1DM 1.0 1.0 1 (1,1)(1,2)\n" alignment=Mock(alignment_id=1, query_id=2, anchor_id=3, query_start=1.0, query_end=2.0, anchor_start=1.0, anchor_end=2.0, orientation="+", confidence=999.9, hit_enum="1DM", query_len=1.0, anchor_len=1.0, label_channel="1", alignment="(1,1)(1,2)") with open("test_file", "w") as o_file: self.obj.write(alignment, o_file) with open("test_file") as i_file: actual=i_file.readline() os.remove("test_file") self.assertEqual(expected, actual)
def __init__ (self, xmap_file_name): file_name_parts=xmap_file_name.split('/') file_name_parts_length=len(file_name_parts) if file_name_parts_length>1: self.workspace="/".join(file_name_parts[0:(file_name_parts_length-1)]) else: self.workspace="." with CD(self.workspace): file_name=file_name_parts[file_name_parts_length-1] self.xmap=XmapFile(file_name) self.anchor_cmap=CmapFile(file_name.replace(".xmap", "_r.cmap")) self.query_cmap=CmapFile(file_name.replace(".xmap", "_q.cmap")) self.ALIGNED_LABELS=re.compile("\(([\d]+),([\d]+)\)")
def test_getExtension(self): self.assertEquals("xmap", XmapFile.getExtension())
def setUp(self): with open(self.input_file, "w"): self.obj=XmapFile(self.input_file)
def setUp(self): self.xmap=open(self.xmap_name, 'w') self.xmap_writer=XmapFile(self.xmap_name) self.query_cmap=open(self.query_cmap_name, 'w') self.anchor_cmap=open(self.anchor_cmap_name, 'w') self.cmap_writer=CmapFile(self.query_cmap_name)
class tFindBreakpoints(tScripts): maxDiff=None xmap_name='file.xmap' xmap_writer=None xmap=None query_cmap_name='file_r.cmap' query_cmap=None anchor_cmap_name='file_q.cmap' anchor_cmap=None cmap_writer=None def setUp(self): self.xmap=open(self.xmap_name, 'w') self.xmap_writer=XmapFile(self.xmap_name) self.query_cmap=open(self.query_cmap_name, 'w') self.anchor_cmap=open(self.anchor_cmap_name, 'w') self.cmap_writer=CmapFile(self.query_cmap_name) def tearDown(self): self.xmap.close() self.query_cmap.close() self.anchor_cmap.close() os.remove(self.xmap_name) os.remove(self.query_cmap_name) os.remove(self.anchor_cmap_name) ### Bad input ### def test_badInput_none(self): expected_stdout="usage: find_breakpoints.py [-h] xmap_file\nfind_breakpoints.py: error: too few arguments\n" expected_returnCode=2 expecteds=[expected_stdout, expected_returnCode] actual_stdout=None try: actual_stdout=subprocess.check_output("../scripts/find_breakpoints.py", stderr=subprocess.STDOUT) actual_returnCode=0 except subprocess.CalledProcessError as error: actual_stdout=error.output actual_returnCode=error.returncode except: raise actuals=[actual_stdout, actual_returnCode] self.assertEquals(expecteds, actuals) def test_badInput_notAFile(self): expected_stdout="The .xmap alignment file you specified, file.xmap.bed.ext, could not be found\n" expected_returnCode=1 expecteds=[expected_stdout, expected_returnCode] actual_stdout=None try: actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name+".bed.ext"], stderr=subprocess.STDOUT) actual_returnCode=0 except subprocess.CalledProcessError as error: actual_stdout=error.output actual_returnCode=error.returncode except: raise actuals=[actual_stdout, actual_returnCode] self.assertEquals(expecteds, actuals) @unittest.skip('not_implemented') def test_badInput_notXmap(self): pass def test_badInput_contigInXmapNotInAnchorCmap(self): for label_position in xrange(1,1000): label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1) self.cmap_writer.write(label, self.query_cmap) self.query_cmap.close() alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=998, anchor_start=1, anchor_end=1000, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*") self.xmap_writer.write(alignment, self.xmap) self.xmap.close() expected_stdout="The .xmap alignment ("+self.xmap_name+") file contains a contig id (1) that was not found in one of the .cmap contig files ("+self.anchor_cmap_name+")\n" expected_returnCode=1 expecteds=[expected_stdout, expected_returnCode] try: actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT) actual_returnCode=0 except subprocess.CalledProcessError as error: actual_stdout=error.output actual_returnCode=error.returncode except: raise actuals=[actual_stdout, actual_returnCode] self.assertEquals(expecteds, actuals) def test_badInput_contigInXmapNotInQueryCmap(self): for label_position in xrange(1,1000): label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1) self.cmap_writer.write(label, self.anchor_cmap) self.anchor_cmap.close() alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=998, anchor_start=1, anchor_end=1000, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*") self.xmap_writer.write(alignment, self.xmap) self.xmap.close() expected_stdout="The .xmap alignment ("+self.xmap_name+") file contains a contig id (1) that was not found in one of the .cmap contig files ("+self.query_cmap_name+")\n" expected_returnCode=1 expecteds=[expected_stdout, expected_returnCode] try: actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT) actual_returnCode=0 except subprocess.CalledProcessError as error: actual_stdout=error.output actual_returnCode=error.returncode except: raise actuals=[actual_stdout, actual_returnCode] self.assertEquals(expecteds, actuals) def test_badInput_cantFindCmap(self): os.remove(self.query_cmap_name) os.remove(self.anchor_cmap_name) expected_stdout="Some of the .cmap contig files associated with your .xmap alignment file could not be found. More specifically, Unable to find query, anchor maps.\n" expected_returnCode=1 expecteds=[expected_stdout, expected_returnCode] actual_stdout=None try: actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT) actual_returnCode=0 except subprocess.CalledProcessError as error: actual_stdout=error.output actual_returnCode=error.returncode except: raise actuals=[actual_stdout, actual_returnCode] with open(self.query_cmap_name, 'w'): pass with open(self.anchor_cmap_name, 'w'): pass self.assertEquals(expecteds, actuals) ### One alignment only ### def test_oneAlignment_no5PercentOverhang(self): for label_position in xrange(1,1000): label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1) self.cmap_writer.write(label, self.query_cmap) self.cmap_writer.write(label, self.anchor_cmap) self.query_cmap.close() self.anchor_cmap.close() alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=998, anchor_start=1, anchor_end=1000, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*") self.xmap_writer.write(alignment, self.xmap) self.xmap.close() expected_stdout="" expected_returnCode=0 expecteds=[expected_stdout, expected_returnCode] try: actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT) actual_returnCode=0 except subprocess.CalledProcessError as error: actual_stdout=error.output actual_returnCode=error.returncode except: raise actuals=[actual_stdout, actual_returnCode] self.assertEquals(expecteds, actuals) def test_oneAlignment_noLabeledOverhang(self): for label_position in xrange(1,500): label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1) self.cmap_writer.write(label, self.query_cmap) self.cmap_writer.write(label, self.anchor_cmap) self.query_cmap.close() self.anchor_cmap.close() alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=500, anchor_start=1, anchor_end=500, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*") self.xmap_writer.write(alignment, self.xmap) self.xmap.close() expected_stdout="" expected_returnCode=0 expecteds=[expected_stdout, expected_returnCode] try: actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT) actual_returnCode=0 except subprocess.CalledProcessError as error: actual_stdout=error.output actual_returnCode=error.returncode except: raise actuals=[actual_stdout, actual_returnCode] self.assertEquals(expecteds, actuals) def test_oneAlignment_oneSingleLabelOverhang(self): for label_position in xrange(1,502): label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1) self.cmap_writer.write(label, self.query_cmap) self.cmap_writer.write(label, self.anchor_cmap) self.query_cmap.close() self.anchor_cmap.close() alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=500, anchor_start=1, anchor_end=500, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*") self.xmap_writer.write(alignment, self.xmap) self.xmap.close() expected_stdout="Chr01\t500\t501\t1\t250.0\t+\n" expected_returnCode=0 expecteds=[expected_stdout, expected_returnCode] try: actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT) actual_returnCode=0 except subprocess.CalledProcessError as error: actual_stdout=error.output actual_returnCode=error.returncode except: raise actuals=[actual_stdout, actual_returnCode] self.assertEquals(expecteds, actuals) def test_oneAlignment_oneLabeledOverhang(self): for label_position in xrange(1,1000): label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1) self.cmap_writer.write(label, self.query_cmap) self.cmap_writer.write(label, self.anchor_cmap) self.query_cmap.close() self.anchor_cmap.close() alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=500, anchor_start=1, anchor_end=500, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*") self.xmap_writer.write(alignment, self.xmap) self.xmap.close() expected_stdout="Chr01\t500\t502\t1\t250.0\t+\n" expected_returnCode=0 expecteds=[expected_stdout, expected_returnCode] try: actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT) actual_returnCode=0 except subprocess.CalledProcessError as error: actual_stdout=error.output actual_returnCode=error.returncode except: raise actuals=[actual_stdout, actual_returnCode] self.assertEquals(expecteds, actuals) ### Multiple alignments ### @unittest.skip('not implmemented') def test_multipleAligns_noTwoLabelOverhang(self): pass
class AssessReferenceAlignment(object): def __init__ (self, xmap_file_name): file_name_parts=xmap_file_name.split('/') file_name_parts_length=len(file_name_parts) if file_name_parts_length>1: self.workspace="/".join(file_name_parts[0:(file_name_parts_length-1)]) else: self.workspace="." with CD(self.workspace): file_name=file_name_parts[file_name_parts_length-1] self.xmap=XmapFile(file_name) self.anchor_cmap=CmapFile(file_name.replace(".xmap", "_r.cmap")) self.query_cmap=CmapFile(file_name.replace(".xmap", "_q.cmap")) self.ALIGNED_LABELS=re.compile("\(([\d]+),([\d]+)\)") def extractTruePositives(self): self.true_positive_labels={} self.true_positive_locations={} for alignment in self.xmap.parse(): anchor=alignment.anchor_id if not anchor in self.true_positive_labels: self.true_positive_labels[anchor]=set() if not anchor in self.true_positive_locations: self.true_positive_locations[anchor]=[] for label_pair in self.ALIGNED_LABELS.finditer(alignment.alignment): self.true_positive_labels[anchor].add(int(label_pair.group(1))) for label in self.anchor_cmap.parse(): if not label.contig_id in self.true_positive_labels: continue if label.label_id in self.true_positive_labels[label.contig_id]: self.true_positive_locations[label.contig_id].append(label.position) return self.true_positive_locations def extractFalseNegatives(self): # false negative lables are present in the anchor, not in the query self.false_negative_labels={} self.false_negative_locations={} for alignment in self.xmap.parse(): anchor=alignment.anchor_id if not anchor in self.false_negative_labels: self.false_negative_labels[anchor]=set() if not anchor in self.false_negative_locations: self.false_negative_locations[anchor]=[] previous_label=None for label_pair in self.ALIGNED_LABELS.finditer(alignment.alignment): anchor_label=int(label_pair.group(1)) if previous_label is None: previous_label=anchor_label continue for i in xrange(previous_label+1,anchor_label): self.false_negative_labels[anchor].add(i) previous_label=anchor_label for label in self.anchor_cmap.parse(): if not label.contig_id in self.false_negative_labels: continue if label.label_id in self.false_negative_labels[label.contig_id]: self.false_negative_locations[label.contig_id].append(label.position) return self.false_negative_locations def extractFalsePositives(self): self.false_positive_labels={} for alignment in self.xmap.parse(): anchor=alignment.anchor_id query=alignment.query_id if not query in self.false_positive_labels: self.false_positive_labels[query]={} previous_label_pair=None for label_pair in self.ALIGNED_LABELS.finditer(alignment.alignment): if previous_label_pair is None: previous_label_pair=label_pair continue previous_query_label=int(previous_label_pair.group(2)) query_label=int(label_pair.group(2)) if alignment.orientation=="+": start=previous_query_label+1 stop=query_label else: start=query_label+1 stop=previous_query_label for i in xrange(start, stop): self.false_positive_labels[query][i]={"anchor_id": anchor, "anchor_last_true_positive": int(previous_label_pair.group(1)), "query_last_true_positive": int(previous_label_pair.group(2))} previous_label_pair=label_pair false_positive_offsets={} last_true_positive=None for label in self.query_cmap.parse(): if not label.contig_id in self.false_positive_labels: last_true_positive=label continue if not label.label_id in self.false_positive_labels[label.contig_id]: last_true_positive=label continue false_positive=self.false_positive_labels[label.contig_id][label.label_id] anchor=false_positive["anchor_id"] anchor_label=false_positive["anchor_last_true_positive"] if not anchor in false_positive_offsets: false_positive_offsets[anchor]={} if not anchor_label in false_positive_offsets[anchor]: false_positive_offsets[anchor][anchor_label]=[] false_positive_offsets[anchor][anchor_label].append(label.position-last_true_positive.position) self.false_positive_locations={} for label in self.anchor_cmap.parse(): if not label.contig_id in false_positive_offsets: continue if not label.label_id in false_positive_offsets[label.contig_id]: continue if not label.contig_id in self.false_positive_locations: self.false_positive_locations[label.contig_id]=[] for offset in false_positive_offsets[label.contig_id][label.label_id]: self.false_positive_locations[label.contig_id].append(label.position+offset) return self.false_positive_locations def extractPartialMatches(self, output_name='partial_matches.xmap'): self.partial_match_locations={} with open(output_name, 'w') as o_file: for align in self.xmap.parse(): proportion=abs(align.query_start-align.query_end)/float(align.query_len) if proportion < 0.9: anchor=align.anchor if not anchor in self.partial_match_locations: self.partial_match_locations[anchor]=[] self.partial_match_locations[anchor].append(align.anchor_start, align.anchor_end) xfile.write(align, o_file) return self.partial_match_locations def extractSequenceContexts(self, loci): pass def processSeqeuenceContexts(self, fasta_file, motif): snvs=set() for i in xrange(0,len(motif)): for base in ['A', 'T', 'C', 'G']: if base==motif[i]: continue snv=motif[0:i]+base+motif[i+1:len(motif)] snvs.add(snv) print("HasGap HasSNV") for record in SeqIO.parse(fasta_file, 'fasta'): output="0" if "NNNNNNN" in record.seq or "nnnnnnn" in record.seq: output="1" contains_snv=False for snv in snvs: if snv in record.seq: contains_snv=True if contains_snv: output+="\t1" else: output+="\t0" print(output) def findNearestNeighbors(self,loci,neighbor_locis): neighbors={} for chr in loci: if not chr in neighbors: neighbors[chr]=[] for locus in loci[chr]: nearest_dist=None for neighbor_loci in neighbor_locis: if not chr in neighbor_loci: continue for neighbor_locus in neighbor_loci[chr]: dist=abs(locus-neighbor_locus) if nearest_dist is None or dist<nearest_dist: nearest_dist=dist if nearest_dist is not None: neighbors[chr].append(nearest_dist) return neighbors def findLabelsWithNearNeighbors(self,loci,neighbor_locis,threshold=301): nearest_neighbors=af.findNearestNeighbors(loci, neighbor_locis) offending_count=0 for chrom in nearest_neighbors: for distance in nearest_neighgbors[chrom]: if distance < 301: offending_count+=1 return offending_count