Exemplo n.º 1
0
class tXmapFile(tFile_base):
	def setUp(self):
		with open(self.input_file, "w"):
			self.obj=XmapFile(self.input_file)
	def test_getExtension(self):
		self.assertEquals("xmap", XmapFile.getExtension())

	def test_parse(self):
		expected=XmapFile_iter(self.input_file)
		self.assertEqual(expected, self.obj.parse())

	def test_write(self):
		expected="1	2	3	1.0	2.0	1.0	2.0	+	999.9	1DM	1.0	1.0	1	(1,1)(1,2)\n"
		alignment=Mock(alignment_id=1, query_id=2, anchor_id=3, query_start=1.0, query_end=2.0, anchor_start=1.0, anchor_end=2.0, orientation="+", confidence=999.9, hit_enum="1DM", query_len=1.0, anchor_len=1.0, label_channel="1", alignment="(1,1)(1,2)")
		with open("test_file", "w") as o_file:

			self.obj.write(alignment, o_file)

		with open("test_file") as i_file:
			actual=i_file.readline()
		os.remove("test_file")

		self.assertEqual(expected, actual)
Exemplo n.º 2
0
	def __init__ (self, xmap_file_name):
		file_name_parts=xmap_file_name.split('/')
		file_name_parts_length=len(file_name_parts)
		if file_name_parts_length>1:
			self.workspace="/".join(file_name_parts[0:(file_name_parts_length-1)])
		else:
			self.workspace="."
		with CD(self.workspace):
			file_name=file_name_parts[file_name_parts_length-1]
			self.xmap=XmapFile(file_name)
			self.anchor_cmap=CmapFile(file_name.replace(".xmap", "_r.cmap"))
			self.query_cmap=CmapFile(file_name.replace(".xmap", "_q.cmap"))

		self.ALIGNED_LABELS=re.compile("\(([\d]+),([\d]+)\)")
Exemplo n.º 3
0
	def test_getExtension(self):
		self.assertEquals("xmap", XmapFile.getExtension())
Exemplo n.º 4
0
	def setUp(self):
		with open(self.input_file, "w"):
			self.obj=XmapFile(self.input_file)
Exemplo n.º 5
0
	def setUp(self):
		self.xmap=open(self.xmap_name, 'w')
		self.xmap_writer=XmapFile(self.xmap_name)
		self.query_cmap=open(self.query_cmap_name, 'w')
		self.anchor_cmap=open(self.anchor_cmap_name, 'w')
		self.cmap_writer=CmapFile(self.query_cmap_name)
Exemplo n.º 6
0
class tFindBreakpoints(tScripts):
	maxDiff=None
	xmap_name='file.xmap'
	xmap_writer=None
	xmap=None
	query_cmap_name='file_r.cmap'
	query_cmap=None
	anchor_cmap_name='file_q.cmap'
	anchor_cmap=None
	cmap_writer=None
	def setUp(self):
		self.xmap=open(self.xmap_name, 'w')
		self.xmap_writer=XmapFile(self.xmap_name)
		self.query_cmap=open(self.query_cmap_name, 'w')
		self.anchor_cmap=open(self.anchor_cmap_name, 'w')
		self.cmap_writer=CmapFile(self.query_cmap_name)
	def tearDown(self):
		self.xmap.close()
		self.query_cmap.close()
		self.anchor_cmap.close()
		os.remove(self.xmap_name)
		os.remove(self.query_cmap_name)
		os.remove(self.anchor_cmap_name)

	### Bad input ###
	def test_badInput_none(self):
		expected_stdout="usage: find_breakpoints.py [-h] xmap_file\nfind_breakpoints.py: error: too few arguments\n"
		expected_returnCode=2
		expecteds=[expected_stdout, expected_returnCode]
		actual_stdout=None
		try:

			actual_stdout=subprocess.check_output("../scripts/find_breakpoints.py", stderr=subprocess.STDOUT)

			actual_returnCode=0
		except subprocess.CalledProcessError as error:
			actual_stdout=error.output
			actual_returnCode=error.returncode
		except:
			raise
		actuals=[actual_stdout, actual_returnCode]

		self.assertEquals(expecteds, actuals)

	def test_badInput_notAFile(self):
		expected_stdout="The .xmap alignment file you specified, file.xmap.bed.ext, could not be found\n"
		expected_returnCode=1
		expecteds=[expected_stdout, expected_returnCode]
		actual_stdout=None
		try:

			actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name+".bed.ext"], stderr=subprocess.STDOUT)

			actual_returnCode=0
		except subprocess.CalledProcessError as error:
			actual_stdout=error.output
			actual_returnCode=error.returncode
		except:
			raise
		actuals=[actual_stdout, actual_returnCode]

		self.assertEquals(expecteds, actuals)

	@unittest.skip('not_implemented')
	def test_badInput_notXmap(self):
		pass
	def test_badInput_contigInXmapNotInAnchorCmap(self):
		for label_position in xrange(1,1000):
			label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1)
			self.cmap_writer.write(label, self.query_cmap)
		self.query_cmap.close()
		alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=998, anchor_start=1, anchor_end=1000, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*")
		self.xmap_writer.write(alignment, self.xmap)
		self.xmap.close()
		expected_stdout="The .xmap alignment ("+self.xmap_name+") file contains a contig id (1) that was not found in one of the .cmap contig files ("+self.anchor_cmap_name+")\n"
		expected_returnCode=1
		expecteds=[expected_stdout, expected_returnCode]
		try:

			actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT)

			actual_returnCode=0
		except subprocess.CalledProcessError as error:
			actual_stdout=error.output
			actual_returnCode=error.returncode
		except:
			raise
		actuals=[actual_stdout, actual_returnCode]

		self.assertEquals(expecteds, actuals)
	def test_badInput_contigInXmapNotInQueryCmap(self):
		for label_position in xrange(1,1000):
			label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1)
			self.cmap_writer.write(label, self.anchor_cmap)
		self.anchor_cmap.close()
		alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=998, anchor_start=1, anchor_end=1000, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*")
		self.xmap_writer.write(alignment, self.xmap)
		self.xmap.close()
		expected_stdout="The .xmap alignment ("+self.xmap_name+") file contains a contig id (1) that was not found in one of the .cmap contig files ("+self.query_cmap_name+")\n"
		expected_returnCode=1
		expecteds=[expected_stdout, expected_returnCode]
		try:

			actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT)

			actual_returnCode=0
		except subprocess.CalledProcessError as error:
			actual_stdout=error.output
			actual_returnCode=error.returncode
		except:
			raise
		actuals=[actual_stdout, actual_returnCode]

		self.assertEquals(expecteds, actuals)
	def test_badInput_cantFindCmap(self):
		os.remove(self.query_cmap_name)
		os.remove(self.anchor_cmap_name)
		expected_stdout="Some of the .cmap contig files associated with your .xmap alignment file could not be found. More specifically, Unable to find query, anchor maps.\n"
		expected_returnCode=1
		expecteds=[expected_stdout, expected_returnCode]
		actual_stdout=None
		try:

			actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT)

			actual_returnCode=0
		except subprocess.CalledProcessError as error:
			actual_stdout=error.output
			actual_returnCode=error.returncode
		except:
			raise
		actuals=[actual_stdout, actual_returnCode]
		with open(self.query_cmap_name, 'w'):
			pass
		with open(self.anchor_cmap_name, 'w'):
			pass

		self.assertEquals(expecteds, actuals)

	### One alignment only ###
	def test_oneAlignment_no5PercentOverhang(self):
		for label_position in xrange(1,1000):
			label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1)
			self.cmap_writer.write(label, self.query_cmap)
			self.cmap_writer.write(label, self.anchor_cmap)
		self.query_cmap.close()
		self.anchor_cmap.close()
		alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=998, anchor_start=1, anchor_end=1000, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*")
		self.xmap_writer.write(alignment, self.xmap)
		self.xmap.close()
		expected_stdout=""
		expected_returnCode=0
		expecteds=[expected_stdout, expected_returnCode]
		try:

			actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT)

			actual_returnCode=0
		except subprocess.CalledProcessError as error:
			actual_stdout=error.output
			actual_returnCode=error.returncode
		except:
			raise
		actuals=[actual_stdout, actual_returnCode]

		self.assertEquals(expecteds, actuals)
	def test_oneAlignment_noLabeledOverhang(self):
		for label_position in xrange(1,500):
			label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1)
			self.cmap_writer.write(label, self.query_cmap)
			self.cmap_writer.write(label, self.anchor_cmap)
		self.query_cmap.close()
		self.anchor_cmap.close()
		alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=500, anchor_start=1, anchor_end=500, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*")
		self.xmap_writer.write(alignment, self.xmap)
		self.xmap.close()
		expected_stdout=""
		expected_returnCode=0
		expecteds=[expected_stdout, expected_returnCode]
		try:

			actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT)

			actual_returnCode=0
		except subprocess.CalledProcessError as error:
			actual_stdout=error.output
			actual_returnCode=error.returncode
		except:
			raise
		actuals=[actual_stdout, actual_returnCode]

		self.assertEquals(expecteds, actuals)

	def test_oneAlignment_oneSingleLabelOverhang(self):
		for label_position in xrange(1,502):
			label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1)
			self.cmap_writer.write(label, self.query_cmap)
			self.cmap_writer.write(label, self.anchor_cmap)
		self.query_cmap.close()
		self.anchor_cmap.close()
		alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=500, anchor_start=1, anchor_end=500, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*")
		self.xmap_writer.write(alignment, self.xmap)
		self.xmap.close()
		expected_stdout="Chr01\t500\t501\t1\t250.0\t+\n"
		expected_returnCode=0
		expecteds=[expected_stdout, expected_returnCode]
		try:

			actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT)

			actual_returnCode=0
		except subprocess.CalledProcessError as error:
			actual_stdout=error.output
			actual_returnCode=error.returncode
		except:
			raise
		actuals=[actual_stdout, actual_returnCode]

		self.assertEquals(expecteds, actuals)
	def test_oneAlignment_oneLabeledOverhang(self):
		for label_position in xrange(1,1000):
			label=Mock(contig_id=1, contig_len=1000, contig_site_count=1000, label_id=label_position, channel="1", position=label_position, stdev=0, coverage=1, occurrences=1)
			self.cmap_writer.write(label, self.query_cmap)
			self.cmap_writer.write(label, self.anchor_cmap)
		self.query_cmap.close()
		self.anchor_cmap.close()
		alignment=Mock(alignment_id=1, query_id=1, anchor_id=1, query_start=2, query_end=500, anchor_start=1, anchor_end=500, orientation='+', confidence=250,hit_enum="*", query_len=1000, anchor_len=1000, label_channel="1", alignment="*")
		self.xmap_writer.write(alignment, self.xmap)
		self.xmap.close()
		expected_stdout="Chr01\t500\t502\t1\t250.0\t+\n"
		expected_returnCode=0
		expecteds=[expected_stdout, expected_returnCode]
		try:

			actual_stdout=subprocess.check_output(["../scripts/find_breakpoints.py",self.xmap_name], stderr=subprocess.STDOUT)

			actual_returnCode=0
		except subprocess.CalledProcessError as error:
			actual_stdout=error.output
			actual_returnCode=error.returncode
		except:
			raise
		actuals=[actual_stdout, actual_returnCode]

		self.assertEquals(expecteds, actuals)

	### Multiple alignments ###
	@unittest.skip('not implmemented')
	def test_multipleAligns_noTwoLabelOverhang(self):
		pass
Exemplo n.º 7
0
class AssessReferenceAlignment(object):
	def __init__ (self, xmap_file_name):
		file_name_parts=xmap_file_name.split('/')
		file_name_parts_length=len(file_name_parts)
		if file_name_parts_length>1:
			self.workspace="/".join(file_name_parts[0:(file_name_parts_length-1)])
		else:
			self.workspace="."
		with CD(self.workspace):
			file_name=file_name_parts[file_name_parts_length-1]
			self.xmap=XmapFile(file_name)
			self.anchor_cmap=CmapFile(file_name.replace(".xmap", "_r.cmap"))
			self.query_cmap=CmapFile(file_name.replace(".xmap", "_q.cmap"))

		self.ALIGNED_LABELS=re.compile("\(([\d]+),([\d]+)\)")

	def extractTruePositives(self):
		self.true_positive_labels={}
		self.true_positive_locations={}
		for alignment in self.xmap.parse():
			anchor=alignment.anchor_id
			if not anchor in self.true_positive_labels:
				self.true_positive_labels[anchor]=set()
			if not anchor in self.true_positive_locations:
				self.true_positive_locations[anchor]=[]
			
			for label_pair in self.ALIGNED_LABELS.finditer(alignment.alignment):
				self.true_positive_labels[anchor].add(int(label_pair.group(1)))

		for label in self.anchor_cmap.parse():
			if not label.contig_id in self.true_positive_labels:
				continue
			if label.label_id in self.true_positive_labels[label.contig_id]:
				self.true_positive_locations[label.contig_id].append(label.position)
		return self.true_positive_locations

	def extractFalseNegatives(self):
		# false negative lables are present in the anchor, not in the query
		self.false_negative_labels={}
		self.false_negative_locations={}

		for alignment in self.xmap.parse():
			anchor=alignment.anchor_id
			if not anchor in self.false_negative_labels:
				self.false_negative_labels[anchor]=set()
			if not anchor in self.false_negative_locations:
				self.false_negative_locations[anchor]=[]
			
			previous_label=None
			for label_pair in self.ALIGNED_LABELS.finditer(alignment.alignment):
				anchor_label=int(label_pair.group(1))

				if previous_label is None:
					previous_label=anchor_label
					continue

				for i in xrange(previous_label+1,anchor_label):
					self.false_negative_labels[anchor].add(i)
				previous_label=anchor_label

		for label in self.anchor_cmap.parse():
			if not label.contig_id in self.false_negative_labels:
				continue
			if label.label_id in self.false_negative_labels[label.contig_id]:
				self.false_negative_locations[label.contig_id].append(label.position)
		return self.false_negative_locations

	def extractFalsePositives(self):
		self.false_positive_labels={}
		for alignment in self.xmap.parse():
			anchor=alignment.anchor_id
			query=alignment.query_id
			if not query in self.false_positive_labels:
				self.false_positive_labels[query]={}
			
			previous_label_pair=None
			for label_pair in self.ALIGNED_LABELS.finditer(alignment.alignment):
				if previous_label_pair is None:
					previous_label_pair=label_pair
					continue

				previous_query_label=int(previous_label_pair.group(2))
				query_label=int(label_pair.group(2))

				if alignment.orientation=="+":
					start=previous_query_label+1
					stop=query_label
				else:
					start=query_label+1
					stop=previous_query_label
				for i in xrange(start, stop):
					self.false_positive_labels[query][i]={"anchor_id": anchor, "anchor_last_true_positive": int(previous_label_pair.group(1)), "query_last_true_positive": int(previous_label_pair.group(2))}

				previous_label_pair=label_pair

		false_positive_offsets={}
		last_true_positive=None
		for label in self.query_cmap.parse():
			if not label.contig_id in self.false_positive_labels:
				last_true_positive=label
				continue
			if not label.label_id in self.false_positive_labels[label.contig_id]:
				last_true_positive=label
				continue

			false_positive=self.false_positive_labels[label.contig_id][label.label_id]
			anchor=false_positive["anchor_id"]
			anchor_label=false_positive["anchor_last_true_positive"]
			if not anchor in false_positive_offsets:
				false_positive_offsets[anchor]={}
			if not anchor_label in false_positive_offsets[anchor]:
				false_positive_offsets[anchor][anchor_label]=[]
			
			false_positive_offsets[anchor][anchor_label].append(label.position-last_true_positive.position)

		self.false_positive_locations={}
		for label in self.anchor_cmap.parse():
			if not label.contig_id in false_positive_offsets:
				continue
			if not label.label_id in false_positive_offsets[label.contig_id]:
				continue

			if not label.contig_id in self.false_positive_locations:
				self.false_positive_locations[label.contig_id]=[]
			for offset in false_positive_offsets[label.contig_id][label.label_id]:
				self.false_positive_locations[label.contig_id].append(label.position+offset)

		return self.false_positive_locations

	def extractPartialMatches(self, output_name='partial_matches.xmap'):
		self.partial_match_locations={}
		with open(output_name, 'w') as o_file:
			for align in self.xmap.parse():
				proportion=abs(align.query_start-align.query_end)/float(align.query_len)
				if proportion < 0.9:
					anchor=align.anchor
					if not anchor in self.partial_match_locations:
						self.partial_match_locations[anchor]=[]
					self.partial_match_locations[anchor].append(align.anchor_start, align.anchor_end)
					xfile.write(align, o_file)
		return self.partial_match_locations

	def extractSequenceContexts(self, loci):
		pass
	def processSeqeuenceContexts(self, fasta_file, motif):
		snvs=set()
		for i in xrange(0,len(motif)):
			for base in ['A', 'T', 'C', 'G']:
				if base==motif[i]:
					continue
				snv=motif[0:i]+base+motif[i+1:len(motif)]
				snvs.add(snv)

		print("HasGap	HasSNV")
		for record in SeqIO.parse(fasta_file, 'fasta'):
			output="0"
			if "NNNNNNN" in record.seq or "nnnnnnn" in record.seq:
				output="1"

			contains_snv=False
			for snv in snvs:
				if snv in record.seq:
					contains_snv=True
			if contains_snv:
				output+="\t1"
			else:
				output+="\t0"
			print(output)


	def findNearestNeighbors(self,loci,neighbor_locis):
		neighbors={}
		for chr in loci:
			if not chr in neighbors:
				neighbors[chr]=[]
			for locus in loci[chr]:
				nearest_dist=None
				for neighbor_loci in neighbor_locis:
					if not chr in neighbor_loci:
						continue
					for neighbor_locus in neighbor_loci[chr]:
						dist=abs(locus-neighbor_locus)
						if nearest_dist is None or dist<nearest_dist:
							nearest_dist=dist
				if nearest_dist is not None:
					neighbors[chr].append(nearest_dist)
		return neighbors

	def findLabelsWithNearNeighbors(self,loci,neighbor_locis,threshold=301):
		
		nearest_neighbors=af.findNearestNeighbors(loci, neighbor_locis)
		offending_count=0
		for chrom in nearest_neighbors:
			for distance in nearest_neighgbors[chrom]:
				if distance < 301:
					offending_count+=1
		return offending_count