def test_keep_three_cycle_two_equal_weights(self):
     """Tests that when there are three cycles with all connections having equal weight, these are preserved under refinement"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene6', 'gene7'), ('gene7', 'gene8'),\
                                     ('gene8', 'gene1')])
     
     neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['gene3', 'gene4']],\
                             [('Contig2', 'Contig3'), 1, ['gene5', 'gene6']],\
                             [('Contig1', 'Contig3'), 1, ['gene8', 'gene1']]]
     
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     
     refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\
                                                 'Contig2':{'gene4':None, 'gene5':None}              ,\
                                                 'Contig3':{'gene6':None, 'gene7':None, 'gene8':None}}
                                                 
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3',\
                                                 'gene7':'Contig3', 'gene8':'Contig3'}
     
     self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), sorted(neighbouring_contigs))
 def test_one_gene_on_contig(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene4', 'gene5'), ('gene5', 'gene10'), ('gene10', 'geneA')])
     neighbouring_contigs = [[('Contig2', 'Contig4'), 1, ['gene5', 'gene10']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa','madansi/tests/data/refine_contig_neighbours_10_blast_hits_file')
     refine_contig_neighbours_object =  RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file', gene_detector, {'Contig2':['','',800], 'Contig4':['','',50]})
     self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), {'Contig4':{'Contig2':[5,50]}, 'Contig2':{'Contig4':[301,2]}})
Exemplo n.º 3
0
    def produce_ordered_contig_graph(self):
        contig_searching = ContigSearching(self.gene_detector,
                                           self.filtered_graph)
        contig_searching.expand_all_contigs()

        refine_neighbouring_contigs = RefineContigNeighbours(
            contig_searching.neighbouring_contigs, self.filtered_graph,
            self.unfiltered_graph, self.filtered_blast_hits_file,
            self.gene_detector, self.sequences)
        refine_neighbouring_contigs.refine_contig_neighbours()
        self.contig_ends = refine_neighbouring_contigs.ends_of_contigs()
        refined_neighbouring_contigs = refine_neighbouring_contigs.refined_neighbouring_contigs

        contig_graph_refined = ContigGraph(refined_neighbouring_contigs)
        contig_graph_unrefined = ContigGraph(
            contig_searching.neighbouring_contigs)
        graph_refined = contig_graph_refined.create_contig_subgraph()
        graph_unrefined = contig_graph_unrefined.create_contig_subgraph()

        iterate_joining_contig_components = IterateJoiningContigComponents(
            graph_unrefined)
        ordered_contig_graph = iterate_joining_contig_components.iterate_joining(
            graph_refined)

        return ordered_contig_graph
 def test_finding_contig_ends_gene_degree_one(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene9', 'gene8'), ('gene8', 'gene7'),
                                    ('gene7', 'gene6'), ('gene7', 'gene4'),
                                    ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig3', 'Contig2'), 1, ['gene7',
                                                          'gene4']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig2': ['', '', 800],
             'Contig3': ['', '', 2000]
         })
     self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(),
                          {
                              'Contig3': {
                                  'Contig2': [250, 700]
                              },
                              'Contig2': {
                                  'Contig3': [2, 301]
                              }
                          })
 def test_finding_contig_ends_gene_degree_one(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene9', 'gene8'), ('gene8', 'gene7'), ('gene7', 'gene6'), ('gene7', 'gene4'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig3', 'Contig2'),1, ['gene7', 'gene4']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig2':['','',800], 'Contig3':['','',2000]})
     self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), {'Contig3':{'Contig2':[250,700]}, 'Contig2':{'Contig3':[2,301]}})
 def test_most_occurent_contig_not_in_neighbours(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene4'), ('gene4', 'gene5'), ('gene2', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene4', 'gene8')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2','gene4']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), [])
 def test_one_gene_on_contig(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene4', 'gene5'), ('gene5', 'gene10'),
                                    ('gene10', 'geneA')])
     neighbouring_contigs = [[('Contig2', 'Contig4'), 1,
                              ['gene5', 'gene10']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file',
         gene_detector, {
             'Contig2': ['', '', 800],
             'Contig4': ['', '', 50]
         })
     self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(),
                          {
                              'Contig4': {
                                  'Contig2': [5, 50]
                              },
                              'Contig2': {
                                  'Contig4': [301, 2]
                              }
                          })
 def test_finding_contig_ends_3_cycle_both_different_half(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene4', 'gene5'), ('gene5', 'geneA'), ('geneA', 'gene8'),\
                                     ('geneA', 'gene6'), ('gene6', 'gene8'), ('gene6', 'gene7')])                                          
     neighbouring_contigs = [[('Contig2', 'Contig3'),1,['geneA']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa','madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object =  RefineContigNeighbours(neighbouring_contigs,filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig2':['','',800], 'Contig3':['','',1200]})
     self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), {'Contig3':{'Contig2':[250,1200]}, 'Contig2':{'Contig3':[301,2]}})
 def test_contig_appearances(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3','gene4']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     self.assertDictEqual(refine_contig_neighbours.find_contig_appearances(neighbouring_contigs[0]),{'Contig1':[3, {0:['gene3'], 1:['gene2'], 2:['gene1']}],\
                                                                                                     'Contig2':[2, {0:['gene4'], 1:['gene5']}]})
 def test_loop_of_genes_between_contigs(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'), ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5'),\
                                     ('gene3', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'geneB')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),2, ['geneA', 'geneB']], [('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     expected_neighbours = [[('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]]
     self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), sorted(expected_neighbours))
 def test_finding_contig_ends_multiple_genes_same_side_of_closer_gene(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene3', 'gene2'), ('gene3', 'gene4'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3', 'gene4']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object =  RefineContigNeighbours(neighbouring_contigs,filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig1':['','',1000], 'Contig2':['', '', 800]})                                            
     refine_contig_neighbours_object.ends_of_contigs()
     self.assertTrue(refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,3]}, 'Contig2':{'Contig1':[2,301]}} or \
                     refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,490]}, 'Contig2':{'Contig1':[2,301]}})
 def test_add_to_contig_appearances(self):
     filtered_graph = nx.Graph()
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     neighbouring_contigs = []
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig1':['','',1000], 'Contig2':['', '', 800], 'Contig3':['','',2000]})
     self.assertDictEqual(refine_contig_neighbours_object.add_to_contig_appearance('gene1', {}, 0), {'Contig1':[1,{0:['gene1']}]})
     self.assertDictEqual(refine_contig_neighbours_object.add_to_contig_appearance('gene1', {'Contig1':[0, {}]}, 1), {'Contig1':[1,{1:['gene1']}]})
     self.assertDictEqual(refine_contig_neighbours_object.add_to_contig_appearance('gene1', {'Contig1':[1, {0:['gene2']}]}, 1), {'Contig1':[2,{0:['gene2'], 1:['gene1']}]})
     self.assertDictEqual(refine_contig_neighbours_object.add_to_contig_appearance('gene1', {'Contig1':[2, {1:['gene2'], 2:['gene3']}]},2), {'Contig1':[3, {1:['gene2'], 2:['gene1', 'gene3']}]})
 def test_keep_all_connections_empty_list(self):
     """Tests an empty graph and initial list of neighbouring contigs"""
     filtered_graph = nx.Graph()
     neighbouring_contigs = []
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
 def test_single_gene_from_a_contig(self):
     """Tests when there are two contigs present, one with a single gene on it when other genes from the same contig are elsewhere in the graph"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2', 'gene3']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {  'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\
                                         'gene4':'Contig2', 'gene5':'Contig2'}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
 def test_orientation_of_contigs(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\
                                     ('gene7', 'gene8')])           
     neighbouring_contigs = [[('Contig1', 'Contig2'),1, ['gene3','gene4']], [('Contig2', 'Contig3'),1,['gene5','gene6']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file' )
     refine_contig_neighbours_object =  RefineContigNeighbours(neighbouring_contigs,filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig1':['','',1000], 'Contig2':['', '', 800], 'Contig3':['','',2000]})
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3', \
                                                 'gene7':'Contig3', 'gene8':'Contig3'}
     refine_contig_neighbours_object.ends_of_contigs()
     self.assertDictEqual(refine_contig_neighbours_object.contig_ends, {'Contig1': {'Contig2':[980,490]}, 'Contig2':{'Contig1':[2,301], 'Contig3':[301,2]}, 'Contig3':{'Contig2':[1,250]}})
 def test_keep_one_connection(self):
     """Tests that one connection in neighbouring contigs is kept under refinement"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene5'), ('gene5', 'gene6'),\
                                     ('gene6', 'gene3'), ('gene3', 'gene4')])
     neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['gene5', 'gene6']]]
     
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2', 'gene4':'Contig2'}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
 def test_finds_orientation_two_contigs(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'),\
                                     ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['geneA', 'geneB']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector,{'Contig1':['','',1000], 'Contig2':['', '', 800]})
     refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\
                                                 'Contig2':{'gene4':None, 'gene5':None}}                      
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2'}                                 
     expected_dict = {'Contig1': {'Contig2':[980,490]}, 'Contig2': {'Contig1':[2,301]}}
     refine_contig_neighbours_object.ends_of_contigs()
     self.assertDictEqual(refine_contig_neighbours_object.contig_ends, expected_dict) 
 def test_contig_joins_in_middle(self):
     """Tests when an intersection is found in the case where one end of the contig is closest to the middle of a second"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene5', 'gene6') ,('gene3', 'gene7'),\
                                     ('gene7', 'gene8')])
                                     
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3', 'gene7']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {  'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1', 'gene4':'Contig1',\
                                         'gene5':'Contig1', 'gene6':'Contig1', 'gene7':'Contig2', 'gene8':'Contig2'}
 
     self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), [])
 def test_three_contigs_together(self):
     """Tests when there are three contigs within a small distance of the intersection points"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene3', 'gene4'), ('gene5', 'gene6'),\
                                     ('gene7','gene2'), ('gene7','gene3'), ('gene7', 'gene5')])
                                     
     neighbouring_contigs =  [[('Contig1', 'Contig2'),1,['gene7']], [('Contig2', 'Contig3'),1,['gene7']],\
                             [('Contig1', 'Contig3'),1,['gene7']]]
                             
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {  'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\
                                         'gene4':'Contig2', 'gene5':'Contig3', 'gene6':'Contig3'}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
Exemplo n.º 20
0
 def produce_ordered_contig_graph(self):
     contig_searching = ContigSearching(self.gene_detector, self.filtered_graph)
     contig_searching.expand_all_contigs()
  
     refine_neighbouring_contigs = RefineContigNeighbours(contig_searching.neighbouring_contigs, self.filtered_graph, self.unfiltered_graph, self.filtered_blast_hits_file, self.gene_detector, self.sequences)
     refine_neighbouring_contigs.refine_contig_neighbours()        
     self.contig_ends            = refine_neighbouring_contigs.ends_of_contigs()
     refined_neighbouring_contigs= refine_neighbouring_contigs.refined_neighbouring_contigs
     
     contig_graph_refined    = ContigGraph(refined_neighbouring_contigs)
     contig_graph_unrefined  = ContigGraph(contig_searching.neighbouring_contigs)
     graph_refined           = contig_graph_refined.create_contig_subgraph()
     graph_unrefined         = contig_graph_unrefined.create_contig_subgraph()
     
     iterate_joining_contig_components   = IterateJoiningContigComponents(graph_unrefined)
     ordered_contig_graph                = iterate_joining_contig_components.iterate_joining(graph_refined)
     
     return ordered_contig_graph
 def test_keep_two_connections(self):
    """Tests that two connections are preserved under refinement"""
    filtered_graph = nx.Graph()
    filtered_graph.add_edges_from([  ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\
                                     ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'gene10'),\
                                     ('gene10', 'gene11'), ('gene11', 'gene12'), ('gene12', 'gene13')])
                                     
    neighbouring_contigs = [[('Contig1', 'Contig2'),1, ['gene4']], [('Contig2', 'Contig3'),2,['gene8','gene9']]]
    gene_detector = GeneDetector('madansi/tests/data/assembly_7_sequences.fa', 'madansi/tests/data/blast_hits_13' )
    
    refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/blast_hits_13', gene_detector, {})
    refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                         'gene5':'Contig2', 'gene6':'Contig2', 'gene7':'Contig2', \
                                         'gene10':'Contig3', 'gene11':'Contig3', 'gene13':'Contig3',\
                                         'gene12':'Contig3'}
    refine_contig_neighbours_object.refine_contig_neighbours()
    self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))                        
 def test_finding_contig_ends_multiple_genes_same_side_of_closer_gene(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene3', 'gene2'),
                                    ('gene3', 'gene4'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['gene3',
                                                          'gene4']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig1': ['', '', 1000],
             'Contig2': ['', '', 800]
         })
     refine_contig_neighbours_object.ends_of_contigs()
     self.assertTrue(refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,3]}, 'Contig2':{'Contig1':[2,301]}} or \
                     refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,490]}, 'Contig2':{'Contig1':[2,301]}})
 def test_finds_orientation_two_contigs(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'),\
                                     ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['geneA',
                                                          'geneB']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig1': ['', '', 1000],
             'Contig2': ['', '', 800]
         })
     refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\
                                                 'Contig2':{'gene4':None, 'gene5':None}}
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2'}
     expected_dict = {
         'Contig1': {
             'Contig2': [980, 490]
         },
         'Contig2': {
             'Contig1': [2, 301]
         }
     }
     refine_contig_neighbours_object.ends_of_contigs()
     self.assertDictEqual(refine_contig_neighbours_object.contig_ends,
                          expected_dict)
 def test_short_contig_in_middle(self):
     """Tests the case when there is a contig with only two genes between longer ones, we should find that the connection that needs more iterations is removed"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\
                                     ('gene7', 'gene8')])
                                     
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3','gene4']],\
                             [('Contig2', 'Contig3'),1,['gene5', 'gene6']],\
                             [('Contig1', 'Contig3'),2,['gene4', 'gene5']]]
                             
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     refine_contig_neighbours_object.contigs = { 'Contig1': {'gene1':None, 'gene2':None, 'gene3':None},\
                                                 'Contig2':{'gene4':None, 'gene5':None},\
                                                 'Contig3':{'gene6':None, 'gene7':None, 'gene8':None}}
                                         
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3',\
                                                 'gene7':'Contig3', 'gene8':'Contig3'}
 
     self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), sorted([[('Contig1', 'Contig2'),1,['gene3', 'gene4']], [('Contig2', 'Contig3'),1,['gene5', 'gene6']]]))
 def test_orientation_further_separation(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'),\
                                     ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5'),\
                                     ('gene5', 'geneC'), ('geneC', 'gene6'), ('gene6', 'gene7'),\
                                     ('gene7', 'gene8')])
     neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['geneA',
                                                          'geneB']],
                             [('Contig2', 'Contig3'), 1, ['geneC']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig1': ['', '', 1000],
             'Contig2': ['', '', 800],
             'Contig3': ['', '', 2000]
         })
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3', \
                                                 'gene7':'Contig3', 'gene8':'Contig3', 'geneA':None, 'geneB':None, 'geneC':None}
     refine_contig_neighbours_object.ends_of_contigs()
     self.assertDictEqual(
         refine_contig_neighbours_object.contig_ends, {
             'Contig1': {
                 'Contig2': [980, 490]
             },
             'Contig2': {
                 'Contig1': [2, 301],
                 'Contig3': [301, 2]
             },
             'Contig3': {
                 'Contig2': [1, 250]
             }
         })
 def test_finding_contig_ends_3_cycle_both_different_half(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene4', 'gene5'), ('gene5', 'geneA'), ('geneA', 'gene8'),\
                                     ('geneA', 'gene6'), ('gene6', 'gene8'), ('gene6', 'gene7')])
     neighbouring_contigs = [[('Contig2', 'Contig3'), 1, ['geneA']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig2': ['', '', 800],
             'Contig3': ['', '', 1200]
         })
     self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(),
                          {
                              'Contig3': {
                                  'Contig2': [250, 1200]
                              },
                              'Contig2': {
                                  'Contig3': [301, 2]
                              }
                          })
 def test_add_to_contig_appearances(self):
     filtered_graph = nx.Graph()
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     neighbouring_contigs = []
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig1': ['', '', 1000],
             'Contig2': ['', '', 800],
             'Contig3': ['', '', 2000]
         })
     self.assertDictEqual(
         refine_contig_neighbours_object.add_to_contig_appearance(
             'gene1', {}, 0), {'Contig1': [1, {
                 0: ['gene1']
             }]})
     self.assertDictEqual(
         refine_contig_neighbours_object.add_to_contig_appearance(
             'gene1', {'Contig1': [0, {}]}, 1),
         {'Contig1': [1, {
             1: ['gene1']
         }]})
     self.assertDictEqual(
         refine_contig_neighbours_object.add_to_contig_appearance(
             'gene1', {'Contig1': [1, {
                 0: ['gene2']
             }]}, 1), {'Contig1': [2, {
                 0: ['gene2'],
                 1: ['gene1']
             }]})
     self.assertDictEqual(
         refine_contig_neighbours_object.add_to_contig_appearance(
             'gene1', {'Contig1': [2, {
                 1: ['gene2'],
                 2: ['gene3']
             }]}, 2),
         {'Contig1': [3, {
             1: ['gene2'],
             2: ['gene1', 'gene3']
         }]})