示例#1
0
def read_input(args):
	'''
	Handle input to scaffolding from three different files
	1. A contig fasta file.
	2. A contigs to subsequenc file (made when parsing bam file)
	3. A link file that describes all linkings of contigs (made when parsing bam file). 
	'''
	contigs ={}
	for accession,sequence in input_.get_contigs(open(args.contigs,'r')):
		c = Contig(accession, sequence = sequence)
		contigs[c.name] = c

	subsequences = {}
	for subseq_name, contig, start_pos, end_pos in input_.get_subsequences(open(args.seqs,'r')):
		s =SubSequence( subseq_name, contigs[contig], start_pos, end_pos)
		subsequences[subseq_name] = s

	#print len(subsequences)
	G = g()

	##
	# Initialize nodes
	for subseq in subsequences:
		G.add_node(subsequences[ subseq ])


	##
	# Create edges

	for (seq1,orientation1, seq2, orientation2, link_count, gap), naive_gap in  input_.get_links(open(args.links,'r')):
		#TODO: Calculate distance here
		#TODO: Add threshold parameter
		if link_count >= 5:
			if naive_gap < - args.overlap:
				naive_gap = 0
			G.add_edge((subsequences[ seq1 ] ,orientation1),(subsequences[ seq2 ],orientation2),d=naive_gap,s=score.nr_links(link_count))

	G.remove_self_links()

	return(G)
示例#2
0
s1 = ss('s1',c1,0,3)
s2 = ss('s2',c1,3,6)
s3 = ss('s3',c2,0,3)
s4 = ss('s4',c2,3,6)
print len(s1),len(s2)


# scaf = Scaffold('scaf1')
# # scaf.add_subsequence(s2,False,0)
# scaf.add_subsequence(s1,False,7)
print(s1)
print(s2)
# print(scaf)


G = g()

G.add_node(s1)
G.add_node(s2)
G.add_node(s3)
G.add_node(s4)

G.add_edge((s1,True),(s4,True),d=0,s=score.nr_links(10))
G.add_edge((s2,True),(s3,True),d=0,s=score.nr_links(12))
G.add_edge((s4,False),(s2,False),d=0,s=score.nr_links(7))


#  false link!
G.add_edge((s1,True),(s1,False),d=0,s=score.nr_links(5))

# G.add_edge()