Exemplo n.º 1
0
def main(open_name_file, dir_path,l):

    DNA.generate_kmer_hash(1)

    groups = read_parsed_taxonomy_file(open_name_file)

    # Read in the FASTA files for each genome
    read_FASTA_files(groups,dir_path)

    # For each bin, generate a number of contigs, 
    all_scores = []
    id_generator = Uniq_id(1000)
    for group_index,group in enumerate(groups):
        for genome in group.genomes:
            parts = genome.split_seq(l)
            print_parts(parts,sys.stdout, id_generator, genome)
Exemplo n.º 2
0
 def test_print_parts(self):
     cur_dir = os.path.dirname(__file__)
     parsed_file_name = os.path.join(cur_dir,"fixtures/parsed_gen_2_2_test.txt")
     open_file = open(parsed_file_name, 'r')
     groups = read_parsed_taxonomy_file(open_file)
     dir_path = os.path.join(cur_dir,"fixtures/reference_genomes")
     read_FASTA_files(groups, dir_path)
     uniq_id = Uniq_id(10)
     with tempfile.NamedTemporaryFile() as tmp_file:
         for group_index, group in enumerate(groups):
             for genome in group.genomes:
                 parts = genome.split_seq(10000)
                 print_parts(parts,tmp_file,uniq_id, genome)
         tmp_file.seek(0)
         genome_parts = list(SeqIO.parse(tmp_file,"fasta"))
         assert_equal(len(genome_parts),1788)
         assert_equal(genome_parts[0].id,"Ehrlichia_canis_Jake_uid58071_10_0")
 def test_generate_group_contigs(self):
     cur_dir = os.path.dirname(__file__)
     parsed_file_name = os.path.join(cur_dir,"fixtures/parsed_gen_2_2_test.txt")
     open_file = open(parsed_file_name, 'r')
     groups = read_parsed_taxonomy_file(open_file)
     dir_path = os.path.join(cur_dir,"fixtures/reference_genomes")
     read_FASTA_files(groups, dir_path)
     uniq_id = Uniq_id(10)
     group = groups[-1]
     s_set = SampleSetting("genomes",20,\
                               1100,1100,\
                               True)
     sg = SampleGroup(s_set, group, uniq_id)
     sg.generate_group_contigs()
     
     assert_equal(len(sg.group.genomes[-1].contigs[-1].full_seq),1100)
     assert_equal(len(group.genomes[-1].contigs[-1].full_seq),1100)
     assert_equal(len(group.genomes[-1].contigs)+len(group.genomes[0].contigs), 20)
Exemplo n.º 4
0
def main(open_name_file, dir_path, x_set, start_position=False):

    try:
        DNA.generate_kmer_hash(2)
    except:
        pass

    groups = read_parsed_taxonomy_file(open_name_file)

    # Read in the FASTA files for each genome
    read_FASTA_files(groups,dir_path)

    # For each bin, generate a number of contigs, 
    all_scores = []
    id_generator = Uniq_id(1000)
    for group_index in range(len(groups)):
        group = groups[group_index]
        sg = SampleGroup(x_set, group, id_generator)
        sg.generate_group_contigs(start_position=start_position)
        sg.print_group_contigs(sys.stdout,start_position=start_position)
Exemplo n.º 5
0
 def test_print_group_contigs(self):
     cur_dir = os.path.dirname(__file__)
     parsed_file_name = os.path.join(cur_dir,"fixtures/parsed_gen_2_2_test.txt")
     open_file = open(parsed_file_name, 'r')
     groups = read_parsed_taxonomy_file(open_file)
     dir_path = os.path.join(cur_dir,"fixtures/reference_genomes")
     read_FASTA_files(groups, dir_path)
     uniq_id = Uniq_id(10)
     group = groups[-1]
     s_set = SampleSetting("genomes",10,\
                               1100,1100,\
                               True)
     sg = SampleGroup(s_set, group, uniq_id)
     sg.generate_group_contigs()
     
     with tempfile.NamedTemporaryFile() as tmp_file:
         sg.print_group_contigs(tmp_file)
         tmp_file.seek(0)
         contig_seqs = list(SeqIO.parse(tmp_file, "fasta"))
         assert_equal(len(contig_seqs),10)
         assert_equal(contig_seqs[0].id, "Capnocytophaga_canimorsus_Cc5_uid70727_10")
         d_string = "Capnocytophaga_canimorsus_Cc5_uid70727_10 Flavobacteriaceae|Capnocytophaga|Capnocytophaga canimorsus"
         assert_equal(contig_seqs[0].description, d_string)
Exemplo n.º 6
0
 def test_read_single_FASTA_file(self):
     cur_dir = os.path.dirname(__file__)
     parsed_file_name = os.path.join(cur_dir,"fixtures/parsed_gen_0_0_mock_test_complete.txt")
     open_file = open(parsed_file_name, 'r')
     groups = read_parsed_taxonomy_file(open_file)
     dir_path = os.path.join(cur_dir,"fixtures/mock_references_test.fa")
     output = read_FASTA_files(groups, dir_path, dir_structure='single_fasta_file')
     assert_is_none(output)
     last_genome = groups[-1].genomes[-1]
     assert_equal(len(last_genome.full_seq),2222430) #wrong length
     assert_equal(last_genome.id, "Pyrobaculum_aerophilum_str._IM2")
     # Same family and genera within group
     assert_equal(groups[-1].genomes[-1].family, groups[-1].genomes[0].family)
     assert_equal(groups[-1].genomes[-1].genus, groups[-1].genomes[0].genus)
     # A correct family
     assert_equal(groups[-1].genomes[-1].family, "Thermoproteaceae") # wrong family
Exemplo n.º 7
0
 def test_read_FASTA_files(self):
     cur_dir = os.path.dirname(__file__)
     parsed_file_name = os.path.join(cur_dir,"fixtures/parsed_gen_2_2_test.txt")
     open_file = open(parsed_file_name, 'r')
     groups = read_parsed_taxonomy_file(open_file)
     dir_path = os.path.join(cur_dir,"fixtures/reference_genomes")
     output = read_FASTA_files(groups, dir_path)
     assert_is_none(output)
     last_genome = groups[-1].genomes[-1]
     assert_equal(len(last_genome.full_seq),2612925)
     assert_equal(last_genome.id, "Capnocytophaga_ochracea_DSM_7271_uid59197")
     # Same family and genera within group
     assert_equal(groups[-1].genomes[-1].family, groups[-1].genomes[0].family)
     assert_equal(groups[-1].genomes[-1].genus, groups[-1].genomes[0].genus)
     # A correct family
     assert_equal(groups[-1].genomes[-1].family, "Flavobacteriaceae")