Пример #1
0
    def run(self):
        # todo: filter fasta to keep just the largest contig.

        # run the fasta through barrnap
        fd, barrnap_outputfile = mkstemp()
        self.files_to_cleanup.append(barrnap_outputfile)
        b = Barrnap(self.input_file, self.threads)
        subprocess.check_output(
            b.construct_barrnap_command(barrnap_outputfile), shell=True)

        boundries = b.read_barrnap_output(barrnap_outputfile)

        f = Fasta(self.input_file)
        fragments = f.calc_fragment_coords(boundries)
        f.populate_fragments_from_chromosome(fragments,
                                             self.max_bases_from_ends)

        ff = FragmentFiles(fragments,
                           self.output_directory,
                           fragment_order=self.fragment_order)
        ff.create_fragment_fastas()

        # create a default profile.txt file
        default_profile = ProfileGenerator(self.output_directory,
                                           len(ff.ordered_fragments),
                                           self.dnaa_fasta, self.threads)
        default_profile.write_output_file()
Пример #2
0
 def test_orphan_5s(self):
     barrnap = Barrnap('xxx', 1, False, chromosome_length=2500000)
     b = barrnap.read_barrnap_output(os.path.join(data_dir,
                                                  'orphan_5S.txt'))
     self.assertEqual(
         b, [[804629, 809557], [809807, 814769], [854058, 859048],
             [1947089, 1952017], [2063399, 2068327], [2241391, 2246490]])
Пример #3
0
 def test_barrnap_normal(self):
     barrnap = Barrnap('xxx', 1, False)
     b = barrnap.read_barrnap_output(
         os.path.join(data_dir, 'barrnap_output.txt'))
     self.assertEqual(
         b, [[36740, 42014], [185730, 190843], [698430, 703543],
             [1535716, 1540829], [3935714, 3940989], [4651666, 4657321]])
Пример #4
0
 def test_repeated_5s(self):
     barrnap = Barrnap(os.path.join(data_dir, 'test.fa'), 1, False)
     b = barrnap.read_barrnap_output(
         os.path.join(data_dir, 'repeated_5S.txt'))
     boundries = []
     for bobj in b:
         boundries.append([bobj.start, bobj.end, bobj.direction])
     self.assertEqual(boundries, [[1254, 6909, True]])
Пример #5
0
 def test_barrnap_70s_over_end(self):
     barrnap = Barrnap(os.path.join(data_dir, 'test.fa'), 1, False)
     b = barrnap.read_barrnap_output(
         os.path.join(data_dir, '70s_over_end.txt'))
     boundries = []
     for bobj in b:
         boundries.append([bobj.start, bobj.end, bobj.direction])
     self.assertEqual(boundries, [[2805, 5721, True], [9200, 1889, True]])
Пример #6
0
 def test_orphan_5s(self):
     barrnap = Barrnap('xxx', 1, False, chromosome_length=2500000)
     b = barrnap.read_barrnap_output(os.path.join(data_dir,
                                                  'orphan_5S.txt'))
     boundries = []
     for bobj in b:
         boundries.append([bobj.start, bobj.end, bobj.direction])
     self.assertEqual(boundries,
                      [[804629, 809557, False], [809807, 814769, False],
                       [854058, 859048, False], [1947089, 1952017, True],
                       [2063399, 2068327, True], [2241391, 2246490, True]])
Пример #7
0
 def test_barrnap_normal(self):
     barrnap = Barrnap('xxx', 1, False)
     b = barrnap.read_barrnap_output(
         os.path.join(data_dir, 'barrnap_output.txt'))
     boundries = []
     for bobj in b:
         boundries.append([bobj.start, bobj.end, bobj.direction])
     self.assertEqual(
         boundries, [[36740, 42014, False], [185730, 190843, False],
                     [698430, 703543, True], [1535716, 1540829, True],
                     [3935714, 3940989, False], [4651666, 4657321, False]])
Пример #8
0
    def run_analysis(self, input_file, p, d):
        # run the fasta through barrnap
        fd, barrnap_outputfile = mkstemp()
        b = Barrnap(input_file, self.threads)
        subprocess.check_output(
           b.construct_barrnap_command(barrnap_outputfile), 
           shell=True)

        boundries = b.read_barrnap_output(barrnap_outputfile)
        
        f = Fasta(input_file, is_circular = self.is_circular)
        fragments = f.calc_fragment_coords( boundries)
        f.populate_fragments_from_chromosome(fragments, self.max_bases_from_ends)
        
        tmpdir = mkdtemp()
        self.dirs_to_cleanup.append(tmpdir)

        ff = FragmentFiles(fragments, tmpdir)
        ff.create_fragment_fastas()
        
         # take each fasta file and blast it against the database
        blast = Blast(d.db_prefix, self.threads)
        
        gat_profile = GATProfile(fragments = [])
        for fasta_file in ff.output_filenames:
            blast_results = blast.run_blast(fasta_file)
            fb = FilterBlast(blast_results, self.min_bit_score, self.min_alignment_length)
            top_result = fb.return_top_result()
            if top_result is None:
                gat_profile.fragments.append('?')
                fasta_file
                
                with open(fasta_file, "r") as fasta_file_fh:
                    with open(self.new_fragments, "a+") as newfrag_fh:
                        newfrag_fh.write(fasta_file_fh.read())
                continue
            else:
                self.top_results.append(top_result) 
            
            if top_result.is_forward():
                gat_profile.fragments.append( str(top_result.subject))
            else:
                gat_profile.fragments.append( str(top_result.subject)+ '\'')
        
        gat_profile.orientate_for_dnaA()
        # lookup the gat_profile to get the number
        tg = TypeGenerator(p, gat_profile)
        type_output_string  =  tg.calculate_type() + "\t" + str(gat_profile)
        if not tg.has_previously_seen:
            with open(self.novel_profiles, "a+") as output_fh:
                output_fh.write(self.db_dir + "\t" + type_output_string + "\n")
        
        return type_output_string
Пример #9
0
    def find_rrna_boundries(self, input_file):
        # run the fasta through barrnap
        fd, barrnap_outputfile = mkstemp()
        b = Barrnap(input_file, self.threads, self.verbose)
        cmd = b.construct_barrnap_command(barrnap_outputfile)
        if self.verbose:
            print("Finding rRNA boundries:\t" + cmd)
        subprocess.check_output(cmd, shell=True)

        boundries = b.read_barrnap_output(barrnap_outputfile)
        if self.verbose:
            print("Boundries:\t" + boundries)
        os.close(fd)
        return boundries
Пример #10
0
 def test_repeated_5s(self):
     barrnap = Barrnap(os.path.join(data_dir, 'test.fa'), 1, False)
     b = barrnap.read_barrnap_output(
         os.path.join(data_dir, 'repeated_5S.txt'))
     self.assertEqual(b, [[1254, 6909]])
Пример #11
0
 def test_barrnap_70s_over_end(self):
     barrnap = Barrnap(os.path.join(data_dir, 'test.fa'), 1, False)
     b = barrnap.read_barrnap_output(
         os.path.join(data_dir, '70s_over_end.txt'))
     self.assertEqual(b, [[2805, 5721], [9200, 1889]])