def run(self): # todo: filter fasta to keep just the largest contig. # run the fasta through barrnap fd, barrnap_outputfile = mkstemp() self.files_to_cleanup.append(barrnap_outputfile) b = Barrnap(self.input_file, self.threads) subprocess.check_output( b.construct_barrnap_command(barrnap_outputfile), shell=True) boundries = b.read_barrnap_output(barrnap_outputfile) f = Fasta(self.input_file) fragments = f.calc_fragment_coords(boundries) f.populate_fragments_from_chromosome(fragments, self.max_bases_from_ends) ff = FragmentFiles(fragments, self.output_directory, fragment_order=self.fragment_order) ff.create_fragment_fastas() # create a default profile.txt file default_profile = ProfileGenerator(self.output_directory, len(ff.ordered_fragments), self.dnaa_fasta, self.threads) default_profile.write_output_file()
def test_orphan_5s(self): barrnap = Barrnap('xxx', 1, False, chromosome_length=2500000) b = barrnap.read_barrnap_output(os.path.join(data_dir, 'orphan_5S.txt')) self.assertEqual( b, [[804629, 809557], [809807, 814769], [854058, 859048], [1947089, 1952017], [2063399, 2068327], [2241391, 2246490]])
def test_barrnap_normal(self): barrnap = Barrnap('xxx', 1, False) b = barrnap.read_barrnap_output( os.path.join(data_dir, 'barrnap_output.txt')) self.assertEqual( b, [[36740, 42014], [185730, 190843], [698430, 703543], [1535716, 1540829], [3935714, 3940989], [4651666, 4657321]])
def test_repeated_5s(self): barrnap = Barrnap(os.path.join(data_dir, 'test.fa'), 1, False) b = barrnap.read_barrnap_output( os.path.join(data_dir, 'repeated_5S.txt')) boundries = [] for bobj in b: boundries.append([bobj.start, bobj.end, bobj.direction]) self.assertEqual(boundries, [[1254, 6909, True]])
def test_barrnap_70s_over_end(self): barrnap = Barrnap(os.path.join(data_dir, 'test.fa'), 1, False) b = barrnap.read_barrnap_output( os.path.join(data_dir, '70s_over_end.txt')) boundries = [] for bobj in b: boundries.append([bobj.start, bobj.end, bobj.direction]) self.assertEqual(boundries, [[2805, 5721, True], [9200, 1889, True]])
def test_orphan_5s(self): barrnap = Barrnap('xxx', 1, False, chromosome_length=2500000) b = barrnap.read_barrnap_output(os.path.join(data_dir, 'orphan_5S.txt')) boundries = [] for bobj in b: boundries.append([bobj.start, bobj.end, bobj.direction]) self.assertEqual(boundries, [[804629, 809557, False], [809807, 814769, False], [854058, 859048, False], [1947089, 1952017, True], [2063399, 2068327, True], [2241391, 2246490, True]])
def test_barrnap_normal(self): barrnap = Barrnap('xxx', 1, False) b = barrnap.read_barrnap_output( os.path.join(data_dir, 'barrnap_output.txt')) boundries = [] for bobj in b: boundries.append([bobj.start, bobj.end, bobj.direction]) self.assertEqual( boundries, [[36740, 42014, False], [185730, 190843, False], [698430, 703543, True], [1535716, 1540829, True], [3935714, 3940989, False], [4651666, 4657321, False]])
def run_analysis(self, input_file, p, d): # run the fasta through barrnap fd, barrnap_outputfile = mkstemp() b = Barrnap(input_file, self.threads) subprocess.check_output( b.construct_barrnap_command(barrnap_outputfile), shell=True) boundries = b.read_barrnap_output(barrnap_outputfile) f = Fasta(input_file, is_circular = self.is_circular) fragments = f.calc_fragment_coords( boundries) f.populate_fragments_from_chromosome(fragments, self.max_bases_from_ends) tmpdir = mkdtemp() self.dirs_to_cleanup.append(tmpdir) ff = FragmentFiles(fragments, tmpdir) ff.create_fragment_fastas() # take each fasta file and blast it against the database blast = Blast(d.db_prefix, self.threads) gat_profile = GATProfile(fragments = []) for fasta_file in ff.output_filenames: blast_results = blast.run_blast(fasta_file) fb = FilterBlast(blast_results, self.min_bit_score, self.min_alignment_length) top_result = fb.return_top_result() if top_result is None: gat_profile.fragments.append('?') fasta_file with open(fasta_file, "r") as fasta_file_fh: with open(self.new_fragments, "a+") as newfrag_fh: newfrag_fh.write(fasta_file_fh.read()) continue else: self.top_results.append(top_result) if top_result.is_forward(): gat_profile.fragments.append( str(top_result.subject)) else: gat_profile.fragments.append( str(top_result.subject)+ '\'') gat_profile.orientate_for_dnaA() # lookup the gat_profile to get the number tg = TypeGenerator(p, gat_profile) type_output_string = tg.calculate_type() + "\t" + str(gat_profile) if not tg.has_previously_seen: with open(self.novel_profiles, "a+") as output_fh: output_fh.write(self.db_dir + "\t" + type_output_string + "\n") return type_output_string
def find_rrna_boundries(self, input_file): # run the fasta through barrnap fd, barrnap_outputfile = mkstemp() b = Barrnap(input_file, self.threads, self.verbose) cmd = b.construct_barrnap_command(barrnap_outputfile) if self.verbose: print("Finding rRNA boundries:\t" + cmd) subprocess.check_output(cmd, shell=True) boundries = b.read_barrnap_output(barrnap_outputfile) if self.verbose: print("Boundries:\t" + boundries) os.close(fd) return boundries
def test_repeated_5s(self): barrnap = Barrnap(os.path.join(data_dir, 'test.fa'), 1, False) b = barrnap.read_barrnap_output( os.path.join(data_dir, 'repeated_5S.txt')) self.assertEqual(b, [[1254, 6909]])
def test_barrnap_70s_over_end(self): barrnap = Barrnap(os.path.join(data_dir, 'test.fa'), 1, False) b = barrnap.read_barrnap_output( os.path.join(data_dir, '70s_over_end.txt')) self.assertEqual(b, [[2805, 5721], [9200, 1889]])