def find_order_orientationless(self, orientationless_fragment): for db_profile in self.profile_db.gats: orientationless_db_profile = GATProfile( self.verbose, fragments=db_profile.orientationless_fragments()) if orientationless_db_profile.does_the_profile_match( orientationless_fragment): return db_profile.order() return 0
def run_analysis(self, input_file, p, d): boundries = self.find_rrna_boundries(input_file) fragments = self.populate_fragments_from_chromosome( input_file, boundries) tmpdir = mkdtemp() self.dirs_to_cleanup.append(tmpdir) ff = FragmentFiles(fragments, tmpdir, self.verbose) ff.create_fragment_fastas() # take each fasta file and blast it against the database blast = Blast(d.db_prefix, self.threads, self.verbose) gat_profile = GATProfile(self.verbose, fragments=[]) for current_fragment in ff.ordered_fragments: fasta_file = current_fragment.output_filename blast_results = blast.run_blast(fasta_file) fb = FilterBlast(blast_results, self.min_bit_score, self.min_alignment_length, self.verbose) top_result = fb.return_top_result() if top_result is None: gat_profile.fragments.append('?') current_fragment.number = '?' with open(fasta_file, "r") as fasta_file_fh: with open(self.new_fragments, "a+") as newfrag_fh: newfrag_fh.write(fasta_file_fh.read()) continue else: self.top_results.append(top_result) current_fragment.number = str(top_result.subject) if str(p.dnaA_fragment_number) == str(current_fragment.number): current_fragment.dna_A = True if top_result.is_forward(): gat_profile.fragments.append(str(top_result.subject)) else: current_fragment.reversed_frag = True gat_profile.fragments.append( str(top_result.subject) + '\'') gat_profile.orientate_for_dnaA() reordered_frag_objs = gat_profile.reorder_fragment_objects_based_on_fragment_name_array( ff.ordered_fragments) pp = PlotProfile(reordered_frag_objs, self.output_plot_file, self.verbose) pp.create_plot() # lookup the gat_profile to get the number tg = TypeGenerator(p, gat_profile, self.verbose) type_output_string = tg.calculate_type() + "\t" + str(gat_profile) self.write_novel_profile_to_file(tg, type_output_string) return type_output_string
def calculate_orientationless_order(self): orientationless_fragment = GATProfile( fragments=self.gat_profile.orientationless_fragments()) for db_profile in self.profile_db.gats: orientationless_db_profile = GATProfile( fragments=db_profile.orientationless_fragments()) if orientationless_db_profile.does_the_profile_match( orientationless_fragment): return db_profile.order() return 0
def run_analysis(self, input_file, p, d): # run the fasta through barrnap fd, barrnap_outputfile = mkstemp() b = Barrnap(input_file, self.threads) subprocess.check_output( b.construct_barrnap_command(barrnap_outputfile), shell=True) boundries = b.read_barrnap_output(barrnap_outputfile) f = Fasta(input_file, is_circular = self.is_circular) fragments = f.calc_fragment_coords( boundries) f.populate_fragments_from_chromosome(fragments, self.max_bases_from_ends) tmpdir = mkdtemp() self.dirs_to_cleanup.append(tmpdir) ff = FragmentFiles(fragments, tmpdir) ff.create_fragment_fastas() # take each fasta file and blast it against the database blast = Blast(d.db_prefix, self.threads) gat_profile = GATProfile(fragments = []) for fasta_file in ff.output_filenames: blast_results = blast.run_blast(fasta_file) fb = FilterBlast(blast_results, self.min_bit_score, self.min_alignment_length) top_result = fb.return_top_result() if top_result is None: gat_profile.fragments.append('?') fasta_file with open(fasta_file, "r") as fasta_file_fh: with open(self.new_fragments, "a+") as newfrag_fh: newfrag_fh.write(fasta_file_fh.read()) continue else: self.top_results.append(top_result) if top_result.is_forward(): gat_profile.fragments.append( str(top_result.subject)) else: gat_profile.fragments.append( str(top_result.subject)+ '\'') gat_profile.orientate_for_dnaA() # lookup the gat_profile to get the number tg = TypeGenerator(p, gat_profile) type_output_string = tg.calculate_type() + "\t" + str(gat_profile) if not tg.has_previously_seen: with open(self.novel_profiles, "a+") as output_fh: output_fh.write(self.db_dir + "\t" + type_output_string + "\n") return type_output_string
def read_profiles(self): with open(self.input_file, newline='') as csvfile: profile_reader = csv.reader(csvfile, delimiter='\t') # skip the header next(profile_reader) profiles = [] for row in profile_reader: if len(row) > 2: fragments = [ row[f] for f in range(1, len(row)) if row[f] != '' ] g = GATProfile(gat_number=row[0], fragments=fragments) g.orientate_for_dnaA() profiles.append(g) return profiles
def create_profiles(self): seen_profiles = [] profiles = [] with open(self.results_file, newline='') as csvfile: profile_reader = csv.reader(csvfile, delimiter='\t') for row in profile_reader: for row in profile_reader: if len(row) > 3: # 1: directory of schema # 2: GS number with GS at the start # 3..N: fragment pattern m = re.match("GS(.+)", row[1]) if m: gat_number = m.group(1) fragments = [row[f] for f in range(2, len(row))] unknown = [ f for f in fragments if f == '?' or f == "?'" ] if len(unknown) > 0: continue g = GATProfile(self.verbose, gat_number=gat_number, fragments=fragments) if str(g) not in seen_profiles: seen_profiles.append(str(g)) profiles.append(g) return profiles
def type_generator(self): profile_db = Profiles(os.path.join(self.db_dir, 'profile.txt'), self.verbose) split_fragments = self.fragments.split('-') input_profile = GATProfile( self.verbose, fragments=split_fragments, dnaA_fragment_number=profile_db.dnaA_fragment_number, dif_fragment_number=profile_db.dif_fragment_number) input_profile.orientate_for_dnaA() is_profile_valid = self.validate_profile(profile_db, input_profile) tg = TypeGenerator(profile_db, input_profile, self.verbose, is_profile_valid) return tg
def test_type_generator_valid(self): profiles = Profiles(os.path.join(data_dir, 'initial_profiles.txt'), False) p = GATProfile(False, fragments=['1', '7\'', '6\'', '4\'', '5\'', '3', '2\'']) tg = TypeGenerator(profiles, p, False, prefix='GS') self.assertEqual(2, tg.calculate_orientationless_order()) self.assertEqual('GS2.122', tg.calculate_type())
def test_type_generator_valid_seen(self): profiles = Profiles(os.path.join(data_dir, 'initial_profiles.txt'), False) p = GATProfile(False, fragments=['1', '2', '3', '5', '4', '6', '7']) tg = TypeGenerator(profiles, p, False, True, prefix='GS') self.assertEqual(2, tg.calculate_orientationless_order()) self.assertEqual('GREEN', tg.quality) self.assertEqual('GS2.0', tg.calculate_type())
def calc_type(self): profile_db = Profiles(os.path.join(self.db_dir, 'profile.txt')) split_fragments = self.fragments.split('-') input_profile = GATProfile(fragments=split_fragments) tg = TypeGenerator(profile_db, input_profile) return tg.calculate_type()
def test_inverted(self): p = GATProfile(fragments=['1', '5', '7', '3\'']) self.assertFalse(p.is_profile_in_correct_orientation()) self.assertEqual(["1'", '3', "7'", "5'"], p.invert_fragments()) p.orientate_for_dnaA() self.assertTrue(p.is_profile_in_correct_orientation())
def calculate_orientationless_order(self): orientationless_fragment = GATProfile( self.verbose, fragments=self.gat_profile.orientationless_fragments()) order = self.find_order_orientationless(orientationless_fragment) if order > 0: return order # invert it inverted_orientationless_fragment = GATProfile( self.verbose, fragments=orientationless_fragment. inverted_orientationless_fragments()) order = self.find_order_orientationless( inverted_orientationless_fragment) if order > 0: return order else: return 0
def test_gat_profile(self): p = GATProfile(fragments=['1', '5', '7', '3']) self.assertTrue(p.is_profile_in_correct_orientation())
def test_orientation_binary(self): p = GATProfile(fragments=['1\'', '5', '7', '3']) self.assertEqual(1, p.orientation_binary()) p = GATProfile(fragments=['1', '5', '7', '2\'']) self.assertEqual(2, p.orientation_binary()) p = GATProfile(fragments=['1', '3\'', '7', '2']) self.assertEqual(4, p.orientation_binary()) p = GATProfile(fragments=['1', '3', '4\'', '2']) self.assertEqual(8, p.orientation_binary()) p = GATProfile(fragments=['1\'', '3\'', '4\'', '2\'']) self.assertEqual(15, p.orientation_binary())
def test_reorientate(self): p = GATProfile(fragments=['1', '5', '7', '3\'']) self.assertEqual(['1', '5', '7', '3'], p.orientationless_fragments())
def test_reorientate(self): p = GATProfile(fragments=[]) self.assertEqual(['1', '3', '7', '2', '5', '6'], p.reorientate_list_to_start_with_one( ['2', '5', '6', '1', '3', '7']))