def read_input(args): ''' Handle input to scaffolding from three different files 1. A contig fasta file. 2. A contigs to subsequenc file (made when parsing bam file) 3. A link file that describes all linkings of contigs (made when parsing bam file). ''' contigs ={} for accession,sequence in input_.get_contigs(open(args.contigs,'r')): c = Contig(accession, sequence = sequence) contigs[c.name] = c subsequences = {} for subseq_name, contig, start_pos, end_pos in input_.get_subsequences(open(args.seqs,'r')): s =SubSequence( subseq_name, contigs[contig], start_pos, end_pos) subsequences[subseq_name] = s #print len(subsequences) G = g() ## # Initialize nodes for subseq in subsequences: G.add_node(subsequences[ subseq ]) ## # Create edges for (seq1,orientation1, seq2, orientation2, link_count, gap), naive_gap in input_.get_links(open(args.links,'r')): #TODO: Calculate distance here #TODO: Add threshold parameter if link_count >= 5: if naive_gap < - args.overlap: naive_gap = 0 G.add_edge((subsequences[ seq1 ] ,orientation1),(subsequences[ seq2 ],orientation2),d=naive_gap,s=score.nr_links(link_count)) G.remove_self_links() return(G)
s1 = ss('s1',c1,0,3) s2 = ss('s2',c1,3,6) s3 = ss('s3',c2,0,3) s4 = ss('s4',c2,3,6) print len(s1),len(s2) # scaf = Scaffold('scaf1') # # scaf.add_subsequence(s2,False,0) # scaf.add_subsequence(s1,False,7) print(s1) print(s2) # print(scaf) G = g() G.add_node(s1) G.add_node(s2) G.add_node(s3) G.add_node(s4) G.add_edge((s1,True),(s4,True),d=0,s=score.nr_links(10)) G.add_edge((s2,True),(s3,True),d=0,s=score.nr_links(12)) G.add_edge((s4,False),(s2,False),d=0,s=score.nr_links(7)) # false link! G.add_edge((s1,True),(s1,False),d=0,s=score.nr_links(5)) # G.add_edge()