def main(open_name_file, dir_path,l): DNA.generate_kmer_hash(1) groups = read_parsed_taxonomy_file(open_name_file) # Read in the FASTA files for each genome read_FASTA_files(groups,dir_path) # For each bin, generate a number of contigs, all_scores = [] id_generator = Uniq_id(1000) for group_index,group in enumerate(groups): for genome in group.genomes: parts = genome.split_seq(l) print_parts(parts,sys.stdout, id_generator, genome)
def test_print_parts(self): cur_dir = os.path.dirname(__file__) parsed_file_name = os.path.join(cur_dir,"fixtures/parsed_gen_2_2_test.txt") open_file = open(parsed_file_name, 'r') groups = read_parsed_taxonomy_file(open_file) dir_path = os.path.join(cur_dir,"fixtures/reference_genomes") read_FASTA_files(groups, dir_path) uniq_id = Uniq_id(10) with tempfile.NamedTemporaryFile() as tmp_file: for group_index, group in enumerate(groups): for genome in group.genomes: parts = genome.split_seq(10000) print_parts(parts,tmp_file,uniq_id, genome) tmp_file.seek(0) genome_parts = list(SeqIO.parse(tmp_file,"fasta")) assert_equal(len(genome_parts),1788) assert_equal(genome_parts[0].id,"Ehrlichia_canis_Jake_uid58071_10_0")