def test_sp_seq_d(): absworkdir = os.path.abspath(workdir) conf = ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load( open("tests/data/precooked/tiny_acc_map.p", "rb")) filteredScrape = PhyscraperScrape(data_obj, ids) filteredScrape._blasted = 1 blast_dir = "tests/data/precooked/fixed/tte_blast_files" # filteredScrape.acc_list_mrca = pickle.load(open("tests/data/precooked/acc_list_mrca.p", 'rb')) filteredScrape.read_blast_wrapper(blast_dir=blast_dir) filteredScrape.remove_identical_seqs() filteredScrape.sp_dict(downtorank) filteredScrape.seq_filter = [ 'deleted', 'subsequence,', 'not', "removed", "deleted," ] gi_sp_d = [] sp_d = filteredScrape.make_sp_dict() for key in sp_d: v = sp_d[key] for v2 in v: v2 = filteredScrape.data.otu_dict[v2] if '^physcraper:status' in v2: not_added = ['deleted', 'subsequence,', 'not'] if v2['^physcraper:status'].split(' ')[0] not in not_added: if '^ncbi:gi' in v2: gi_sp_d.append(v2['^ncbi:accession']) user_sp_d = [] for v in filteredScrape.sp_d.values(): for v2 in v: v2 = filteredScrape.data.otu_dict[v2] if '^physcraper:status' in v2 or u'^physcraper:status' in v2: if v2['^physcraper:status'].split( ' ')[0] not in filteredScrape.seq_filter: if v2['^physcraper:last_blasted'] != '1800/01/01': if '^user:TaxonName' in v2: user_sp_d.append(v2['^user:TaxonName']) elif '^ot:ottTaxonName' in v2: user_sp_d.append(v2['^ot:ottTaxonName']) filteredScrape.make_sp_seq_dict() gi_sp_seq_d = [] ott_sp_seq_d = [] for v in filteredScrape.sp_seq_d.values(): for k in v.keys(): # print(k) if len(k.split('.')) >= 2: # if type(k) == int: gi_sp_seq_d.append(k) else: # if type(k) == str or type(k) == unicode: ott_sp_seq_d.append(k) # print(len(ott_sp_seq_d), len(user_sp_d), len(gi_sp_seq_d), len(gi_sp_d)) assert len(ott_sp_seq_d) == len(user_sp_d) assert len(gi_sp_seq_d) == len(gi_sp_d)
def test_read_local_blast(): conf = physcraper.ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = physcraper.IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load( open("tests/data/precooked/tiny_acc_map.p", "rb")) filteredScrape = PhyscraperScrape(data_obj, ids) filteredScrape._blasted = 1 blast_dir = "tests/data/precooked/fixed/tte_blast_files" # filteredScrape.acc_list_mrca = pickle.load(open("tests/data/precooked/acc_list_mrca.p", 'rb')) filteredScrape.read_blast_wrapper(blast_dir=blast_dir) filteredScrape.remove_identical_seqs() filteredScrape.sp_dict(downtorank) filteredScrape.make_sp_seq_dict() # print("prepare test") for taxonID in filteredScrape.sp_d: if len(filteredScrape.sp_seq_d[taxonID]) > treshold: # print(taxonID) blast_seq = filteredScrape.sp_seq_d[taxonID].keys()[0] seq = filteredScrape.sp_seq_d[taxonID][blast_seq] local_blast.write_filterblast_query(filteredScrape.workdir, taxonID, seq, fn=str(taxonID)) # print(filteredScrape.sp_seq_d[taxonID].keys()[1:] ) blast_db = [ item for item in filteredScrape.sp_seq_d[taxonID].keys()[1:] if len(item.split(".")) >= 2 ] # print(blast_db) for blast_key in blast_db: seq = filteredScrape.sp_seq_d[taxonID][blast_key] local_blast.write_filterblast_db(filteredScrape.workdir, blast_key, seq, fn=str(taxonID)) break # print(taxonID) blast_db = taxonID blast_seq = taxonID key = taxonID local_blast.run_filter_blast(filteredScrape.workdir, blast_seq, blast_db) local_blast.read_filter_blast(filteredScrape.workdir, filteredScrape.sp_seq_d[key], blast_db) blast_out = "{}/blast/output_{}_tobeblasted.xml".format(workdir, key) if os.path.exists(blast_out): with open(blast_out) as f: first_line = f.readline() assert len(first_line.strip()) != 0