Пример #1
0
    def run(self):
        # todo: filter fasta to keep just the largest contig.

        # run the fasta through barrnap
        fd, barrnap_outputfile = mkstemp()
        self.files_to_cleanup.append(barrnap_outputfile)
        b = Barrnap(self.input_file, self.threads)
        subprocess.check_output(
            b.construct_barrnap_command(barrnap_outputfile), shell=True)

        boundries = b.read_barrnap_output(barrnap_outputfile)

        f = Fasta(self.input_file)
        fragments = f.calc_fragment_coords(boundries)
        f.populate_fragments_from_chromosome(fragments,
                                             self.max_bases_from_ends)

        ff = FragmentFiles(fragments,
                           self.output_directory,
                           fragment_order=self.fragment_order)
        ff.create_fragment_fastas()

        # create a default profile.txt file
        default_profile = ProfileGenerator(self.output_directory,
                                           len(ff.ordered_fragments),
                                           self.dnaa_fasta, self.threads)
        default_profile.write_output_file()
Пример #2
0
    def test_calc_fragment_coords_gz(self):
        f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa.gz'), False)
        boundries = [[45, 55], [90, 110], [150, 180]]
        fragments = f.calc_fragment_coords(boundries)

        coords = [f.coords for f in fragments]
        self.assertEqual(coords,
                         [[[180, 200], [0, 45]], [[55, 90]], [[110, 150]]])
Пример #3
0
    def test_chop_from_ends(self):
        f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False)
        fragments = f.calc_fragment_coords([[45, 55], [90, 110], [150, 180]])
        sequences = [str(f.sequence) for f in fragments]

        f.populate_fragments_from_chromosome(fragments, 5)
        sequences = [str(f.sequence) for f in fragments]
        self.assertEqual(sequences,
                         ['TTTTTNNNAAAAA', 'CCCCCNNNCCCCC', 'GGGGGNNNGGGGG'])
Пример #4
0
    def test_calc_fragment_coords(self):
        f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False)
        boundries = [[45, 55], [90, 110], [150, 180]]
        fragments = f.calc_fragment_coords(boundries)

        coords = [f.coords for f in fragments]
        self.assertEqual(coords,
                         [[[180, 200], [0, 45]], [[55, 90]], [[110, 150]]])

        f.populate_fragments_from_chromosome(fragments, None)
Пример #5
0
    def run_analysis(self, input_file, p, d):
        # run the fasta through barrnap
        fd, barrnap_outputfile = mkstemp()
        b = Barrnap(input_file, self.threads)
        subprocess.check_output(
           b.construct_barrnap_command(barrnap_outputfile), 
           shell=True)

        boundries = b.read_barrnap_output(barrnap_outputfile)
        
        f = Fasta(input_file, is_circular = self.is_circular)
        fragments = f.calc_fragment_coords( boundries)
        f.populate_fragments_from_chromosome(fragments, self.max_bases_from_ends)
        
        tmpdir = mkdtemp()
        self.dirs_to_cleanup.append(tmpdir)

        ff = FragmentFiles(fragments, tmpdir)
        ff.create_fragment_fastas()
        
         # take each fasta file and blast it against the database
        blast = Blast(d.db_prefix, self.threads)
        
        gat_profile = GATProfile(fragments = [])
        for fasta_file in ff.output_filenames:
            blast_results = blast.run_blast(fasta_file)
            fb = FilterBlast(blast_results, self.min_bit_score, self.min_alignment_length)
            top_result = fb.return_top_result()
            if top_result is None:
                gat_profile.fragments.append('?')
                fasta_file
                
                with open(fasta_file, "r") as fasta_file_fh:
                    with open(self.new_fragments, "a+") as newfrag_fh:
                        newfrag_fh.write(fasta_file_fh.read())
                continue
            else:
                self.top_results.append(top_result) 
            
            if top_result.is_forward():
                gat_profile.fragments.append( str(top_result.subject))
            else:
                gat_profile.fragments.append( str(top_result.subject)+ '\'')
        
        gat_profile.orientate_for_dnaA()
        # lookup the gat_profile to get the number
        tg = TypeGenerator(p, gat_profile)
        type_output_string  =  tg.calculate_type() + "\t" + str(gat_profile)
        if not tg.has_previously_seen:
            with open(self.novel_profiles, "a+") as output_fh:
                output_fh.write(self.db_dir + "\t" + type_output_string + "\n")
        
        return type_output_string
Пример #6
0
    def test_populate_fragments_from_chromosome(self):
        f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False)
        fragments = f.calc_fragment_coords([[45, 55], [90, 110], [150, 180]])
        sequences = [str(f.sequence) for f in fragments]
        self.assertEqual(sequences, ["", "", ""])

        f.populate_fragments_from_chromosome(fragments, None)
        sequences = [str(f.sequence) for f in fragments]
        self.assertEqual(sequences, [
            "TTTTTTTTTTTTTTTTTTTTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
            "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC",
            "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
        ])
Пример #7
0
    def test_chop_from_ends(self):
        f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False)
        boundries = [
            Operon(45, 55, True),
            Operon(90, 110, False),
            Operon(150, 180, True)
        ]
        fragments = f.calc_fragment_coords(boundries)
        sequences = [str(f.sequence) for f in fragments]

        f.populate_fragments_from_chromosome(fragments, 5)
        sequences = [str(f.sequence) for f in fragments]
        self.assertEqual(sequences,
                         ['TTTTTNNNAAAAA', 'CCCCCNNNCCCCC', 'GGGGGNNNGGGGG'])
Пример #8
0
    def test_populate_fragments_from_chromosome(self):
        f = Fasta(os.path.join(data_dir, 'calc_fragment_coords.fa'), False)
        boundries = [
            Operon(45, 55, True),
            Operon(90, 110, False),
            Operon(150, 180, True)
        ]
        fragments = f.calc_fragment_coords(boundries)
        sequences = [str(f.sequence) for f in fragments]
        self.assertEqual(sequences, ["", "", ""])

        f.populate_fragments_from_chromosome(fragments, None)
        sequences = [str(f.sequence) for f in fragments]
        self.assertEqual(sequences, [
            "TTTTTTTTTTTTTTTTTTTTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
            "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC",
            "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"
        ])
Пример #9
0
 def populate_fragments_from_chromosome(self, input_file, boundries):
     f = Fasta(input_file, self.verbose, is_circular=self.is_circular)
     fragments = f.calc_fragment_coords(boundries)
     f.populate_fragments_from_chromosome(fragments,
                                          self.max_bases_from_ends)
     return fragments