def restore_template(): gen_record = ld.load_bio_files(["Data/species_bold_own_genbank.fasta"]) #gen_record = {"AGB001-11_Salticus_scenicus_BOLD": gen_record["AGB001-11_Salticus_scenicus_BOLD"]}; primer_pairs = ld.load_csv_file("Data/P&PP.csv") #primer_pairs = [primer_pairs[5]] template, discarded, rs, cs = m.compute_gen_matching( 5, 5, primer_pairs, gen_record, 0) header = [ "primerPair", "fastaid", "primerF", "primerR", "mismFT", "mismRT", "amplicon", "F_pos", "mismFT_loc", "mismFT_type", "mismFT_base", "R_pos" ] m.store_matching_results("Test_data/test1.csv", template, header=TEMPLATE_HEADER) templateR = ld.load_template("Test_data/test1.csv") templateR, discarded, rs, cs = ld.restore_template(templateR, gen_record, primer_pairs, 10) i.save_matching_info("Test_data/test2", templateR, discarded, rs, cs, header=TEMPLATE_HEADER) return
def pandas_scalability_test(): """ @brief: check if time is linearly proportional to matrix size """ gen_record_large = ld.load_bio_files( ["Data/mitochondrion.1.1.genomic.fna"]) gen_record = ld.load_bio_files(["Data/species_bold_own_genbank.fasta"]) primer_pairs = ld.load_csv_file("Data/P&PP.csv") max_len = 0 key = None for gen_key in gen_record_large: leng = len(gen_record_large[gen_key]) if (leng > max_len): max_len = leng key = gen_key primer = primer_pairs[4].f #largest_matrix len= 2M*primer_len gen = gen_record_large[key] time1 = time.time() result_matrix = m.MATCH_TABLE.loc[gen, primer] elapsedTime_l = ((time.time() - time1)) #len = 658*primer_len gen = gen_record["ACEA563-14_Aphis_gossypii_BOLD"] len_s = len(gen) time1 = time.time() result_matrix = m.MATCH_TABLE.loc[gen, primer] elapsedTime_s = ((time.time() - time1)) print(int(elapsedTime_l * (10**9) / max_len), int(elapsedTime_s * (10**9) / len_s)) return
def load_primer_pairs(primer_pairs_file): primer_pairs = None try: primer_pairs = ld.load_csv_file(primer_pairs_file) logging.info("Primer pairs file loaded!") except: logging.error("At loading primer pairs file") return primer_pairs
def check_if_multiple_alignments_are_frequent(): gen_record = ld.load_bio_files(["Data/species_bold_own_genbank.fasta"]) primer_pairs = ld.load_csv_file("Data/P&PP.csv") gen_alignment_list = m.compute_gen_matching(5, 5, primer_pairs, gen_record) for gm in gen_alignment_list: matching_list = gm.get_matching_list() for al_list in matching_list: alignments = al_list.get_list() if (len(alignments > 1)): print(al_list.gen.id, al_list.primer_pair.id, len(alignments)) return
def matching_test(): init_logger() set_verbosity(True) gen_record = ld.load_bio_files(["test_input/sbog_test.fasta"]) #gen_record = split(gen_record, 0.005) #gen_record = {"AGB001-11_Salticus_scenicus_BOLD": gen_record["AGB001-11_Salticus_scenicus_BOLD"]}; primer_pairs = ld.load_csv_file("test_input/PP.csv") #primer_pairs = {"6":primer_pairs["6"]} output = "Test_data/output" header = [i for i in range(len(TEMPLATE_HEADER))] #template, discarded, rs, cs = m.compute_gen_matching(5, 5, primer_pairs, gen_record, output) #m.store_matching_results("Test_data/test1.csv", template, header=TEMPLATE_HEADER) #i.save_matching_info("TEST", output, template, header, discarded, rs, cs) close_logger() print(gen_record["ACEA1016-14_Aphis_spiraecola_BOLD"][550:550 + 23]) print(''.join(primer_pairs["10"].r.seq)) return
def test_all_pairs(): #"primerPair","id","fastaid","organism","subgrup","primerF","primerR","mismFT","mismRT","mismTT","mismF3","mismR3","mismT3","long" trusted_results = pd.read_csv( "Test_data/mismatches_allPrimers_allMitochondria.csv", sep=',') global_check = {"amplicon": 1, "missf": 1, "missr": 1} check = {"amplicon": 1, "missf": 1, "missr": 1} gen_record = ld.load_bio_files(["Data/mitochondrion.1.1.genomic.fna"], writable=True) gen_record = split(gen_record, 0.01) primer_pairs = ld.load_csv_file("Data/P&PP.csv") result = m.compute_gen_matching(5, 5, primer_pairs, gen_record, 0, hanging_primers=True) gen_matching_table = result[0] header = TEMPLATE_HEADER correct_alignments = pd.DataFrame(columns=header) info = { "total_gens": len(gen_record), "matches_skipped": 0, "alignments_processed": 0, "multiple_alignment_cases": 0, "better_alignments": 0 } for index in gen_matching_table.index: genid = gen_matching_table.iloc[index].loc["fastaid"] pp = gen_matching_table.iloc[index].loc["primerPair"] target = trusted_results.loc[trusted_results['fastaid'].str.contains( genid[4:-1])] target = target.loc[target['primerPair'] == int(pp)] if (target.empty): #print("Target empty, skipping this gen...") info["matches_skipped"] += 1 #Add them to not checked list else: pp = primer_pairs[pp] info["alignments_processed"] += 1 #amplicon amplicon = gen_matching_table.iloc[index].loc["amplicon"] if (amplicon < pp.min_amplicon or amplicon > pp.max_amplicon): print(genid) print("PRIMER PAIR: ", pp.id) print("Amplicon outside range") print(amplicon, pp.min_amplicon, pp.max_amplicon) global_check["amplicon"] = 0 check["amplicon"] = 0 if (amplicon != target['long'].iat[0]): print(genid) print("PRIMER PAIR: ", pp.id) print("Amplicon not matching") global_check["amplicon"] = 0 check["amplicon"] = 0 fm = target['mismFT'].iat[0] rm = target['mismRT'].iat[0] #fm if (gen_matching_table.iloc[index].loc["mismFT"] > fm): print(genid) print("PRIMER PAIR: ", pp.id) print("Bad forward matching") global_check["missf"] = 0 check["missf"] = 0 #rm if (gen_matching_table.iloc[index].loc["mismRT"] > rm): print(genid) print("PRIMER PAIR: ", pp.id) print("Bad reverse matching") global_check["missr"] = 0 check["missr"] = 0 if 0 not in check.values( ): #if everything is correct check if the result found is better correct_alignments.loc[correct_alignments.shape[ 0]] = gen_matching_table.loc[index] if (gen_matching_table.iloc[index].loc["mismFT"] + gen_matching_table.iloc[index].loc["mismRT"] < fm + rm): info["better_alignments"] += 1 else: info["better_alignments"] += 0 #correct_alignment_list.append(al) for info_key in info: print(info_key, info[info_key]) sum_check = 0 for c in global_check: sum_check += check[c] if (sum_check == len(check)): print("SUCCESS!") else: print("TEST FAILED") correct_alignments.to_csv("Test_data/correct_alignments.csv", index_label="id") """ store_results("Test_data/better_alignments.csv", better_alignment_list, header) store_results("Test_data/not_tested_alignments.csv", not_tested_alignment_list, header) ld.store_matching_results("Test_data/full_alignments.csv", gen_alignment_list, header) """ return