def test_0(): if os.path.isfile("tests/data/precooked/otol_scraper.p"): # physcraper.debug(os.getcwd()) conf = physcraper.ConfigObj(configfi, interactive=False) # physcraper.debug("conf") conf.unmapped = 'keep' # physcraper.debug("set unmapped") data_obj = pickle.load( open("tests/data/precooked/otol_tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir # physcraper.debug("dataobj loaded") ids = physcraper.IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load( open("tests/data/precooked/otol_tiny_gi_map.p", "rb")) # physcraper.debug("ids loaded") scraper = pickle.load(open("tests/data/precooked/otol_scraper.p", "rb")) # physcraper.debug("scraper loaded") # scraper2 = pickle.load(open("tests/data/precooked/otol_scraper.p", "rb")) num_keep = len(scraper.data.aln.taxon_namespace) # physcraper.debug('num_keep') # physcraper.debug(num_keep) # except: else: sys.stdout.write("\n\n No files present\n\n") conf = physcraper.ConfigObj(configfi) conf.unmapped = 'keep' aln = DnaCharacterMatrix.get(path=seqaln, schema=mattype) data_obj = physcraper.generate_ATT_from_phylesystem( aln=aln, workdir=workdir, study_id=study_id, tree_id=tree_id, phylesystem_loc=conf.phylesystem_loc) # physcraper.debug(len(data_obj.aln.taxon_namespace)) pickle.dump(data_obj, open("tests/data/precooked/otol_tiny_dataobj.p", "wb")) ids = physcraper.IdDicts(conf, workdir=workdir) # physcraper.debug(os.getcwd()) pickle.dump(ids.acc_ncbi_dict, open("tests/data/precooked/otol_tiny_gi_map.p", "wb")) data_obj.write_files() scraper = physcraper.PhyscraperScrape(data_obj, ids) # physcraper.debug(len(scraper.data.aln.taxon_namespace)) # physcraper.debug("scraper obj made") pickle.dump(scraper.config, open("tests/data/precooked/otol_conf.p", "wb")) pickle.dump(scraper, open("tests/data/precooked/otol_scraper.p", "wb")) num_keep = len(scraper.data.aln.taxon_namespace)
def test_prune_short(): if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = physcraper.ConfigObj(configfi, interactive=False) ids = physcraper.IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): otu_json = json.load(open(otu_jsonfi)) else: otu_json = physcraper.OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) data_obj = physcraper.generate_ATT_from_files(seqaln=seqaln, mattype=mattype, workdir=workdir, treefile=treefile, schema_trf=schema_trf, otu_json=otu_jsonfi, ingroup_mrca=None) len_before = len(data_obj.tre.taxon_namespace) data_obj.prune_short(0.9) len_after = len(data_obj.tre.taxon_namespace) assert len_before > len_after
def test_run_filter_blast(): conf = physcraper.ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = physcraper.IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load( open("tests/data/precooked/tiny_acc_map.p", "rb")) filteredScrape = physcraper.FilterBlast(data_obj, ids) blast_db = "otuSlagascanus" blast_seq = "otuSlagascanus" if not os.path.exists("{}/blast".format(filteredScrape.data.workdir)): os.makedirs("{}/blast/".format(filteredScrape.data.workdir)) path1 = '{}/tests/data/precooked/fixed/select-blast/*'.format(os.getcwd()) path2 = "{}/blast/".format(filteredScrape.data.workdir) cmd = 'cp -r ' + path1 + ' ' + path2 os.system(cmd) local_blast.run_filter_blast(filteredScrape.data.workdir, blast_seq, blast_db) blast_out = "{}/blast/output_otuSlagascanus_tobeblasted.xml".format( workdir) if os.path.exists(blast_out): open(blast_out)
def test_prune_short(): if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = physcraper.ConfigObj(configfi, interactive=False) conf.blast_loc = 'remote' #saves time over loading names and nodes, and they aren't used here ids = physcraper.IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): otu_json = json.load(open(otu_jsonfi)) else: otu_json = physcraper.OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) data_obj = physcraper.generate_ATT_from_files(seqaln=seqaln, mattype=mattype, workdir=workdir, config_obj=conf, treefile=treefile, schema_trf=schema_trf, otu_json=otu_jsonfi, ingroup_mrca=None) data_obj.config.seq_len_perc = 0.9 len_before = len(data_obj.tre.taxon_namespace) data_obj.prune_short() len_after = len(data_obj.tre.taxon_namespace) assert len_before > len_after
def test_read_local_blast(): conf = physcraper.ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = physcraper.IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load( open("tests/data/precooked/tiny_acc_map.p", "rb")) filteredScrape = PhyscraperScrape(data_obj, ids) filteredScrape._blasted = 1 blast_dir = "tests/data/precooked/fixed/tte_blast_files" # filteredScrape.acc_list_mrca = pickle.load(open("tests/data/precooked/acc_list_mrca.p", 'rb')) filteredScrape.read_blast_wrapper(blast_dir=blast_dir) filteredScrape.remove_identical_seqs() filteredScrape.sp_dict(downtorank) filteredScrape.make_sp_seq_dict() # print("prepare test") for taxonID in filteredScrape.sp_d: if len(filteredScrape.sp_seq_d[taxonID]) > treshold: # print(taxonID) blast_seq = filteredScrape.sp_seq_d[taxonID].keys()[0] seq = filteredScrape.sp_seq_d[taxonID][blast_seq] local_blast.write_filterblast_query(filteredScrape.workdir, taxonID, seq, fn=str(taxonID)) # print(filteredScrape.sp_seq_d[taxonID].keys()[1:] ) blast_db = [ item for item in filteredScrape.sp_seq_d[taxonID].keys()[1:] if len(item.split(".")) >= 2 ] # print(blast_db) for blast_key in blast_db: seq = filteredScrape.sp_seq_d[taxonID][blast_key] local_blast.write_filterblast_db(filteredScrape.workdir, blast_key, seq, fn=str(taxonID)) break # print(taxonID) blast_db = taxonID blast_seq = taxonID key = taxonID local_blast.run_filter_blast(filteredScrape.workdir, blast_seq, blast_db) local_blast.read_filter_blast(filteredScrape.workdir, filteredScrape.sp_seq_d[key], blast_db) blast_out = "{}/blast/output_{}_tobeblasted.xml".format(workdir, key) if os.path.exists(blast_out): with open(blast_out) as f: first_line = f.readline() assert len(first_line.strip()) != 0
def test_select_seq_by_local_blast(): conf = physcraper.ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = physcraper.IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load(open("tests/data/precooked/tiny_acc_map.p", "rb")) filteredScrape = physcraper.FilterBlast(data_obj, ids) filteredScrape.add_setting_to_self(downtorank, threshold) filteredScrape._blasted = 1 blast_dir = "tests/data/precooked/fixed/tte_blast_files" # filteredScrape.acc_list_mrca = pickle.load(open("tests/data/precooked/acc_list_mrca.p", 'rb')) filteredScrape.read_blast_wrapper(blast_dir=blast_dir) filteredScrape.remove_identical_seqs() filteredScrape.sp_dict(downtorank) filteredScrape.make_sp_seq_dict() ##this is the code of the first part of how many seq to keep. if threshold is bigger than number of seq for sp, just add all # print("start test") count = 0 for tax_id in filteredScrape.sp_d: count_dict = filteredScrape.count_num_seq(tax_id) if count_dict["new_taxon"]: if count_dict["query_count"] < threshold: count += count_dict["query_count"] if count_dict["query_count"] > threshold: count += threshold if count_dict["new_taxon"] is False: if count_dict["query_count"] >= 1: if count_dict["seq_present"] < threshold: count += threshold-count_dict["seq_present"] if count_dict["seq_present"] > threshold: count += 0 filteredScrape.how_many_sp_to_keep(threshold, selectby) assert count == len(filteredScrape.filtered_seq) and count>0 # #added before # #[429489224, 429489233, 429489188] # {'^ncbi:taxon': 1268591, '^ncbi:title': 'Senecio scopolii subsp. scopolii clone JC4715-6 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_scopolii_subsp._scopolii', '^physcraper:status': 'query', '^ot:ottId': 114544, '^ncbi:accession': 'JX895389.1', '^ncbi:gi': 429489224, '^physcraper:last_blasted': '1800/01/01'} # {'^ncbi:taxon': 1268591, '^ncbi:title': 'Senecio scopolii subsp. scopolii clone JC4715-15 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_scopolii_subsp._scopolii', '^physcraper:status': 'query', '^ot:ottId': 114544, '^ncbi:accession': 'JX895398.1', '^ncbi:gi': 429489233, '^physcraper:last_blasted': '1800/01/01'} # {'^ncbi:taxon': 1268580, '^ncbi:title': 'Senecio lagascanus clone JC5600-6 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_lagascanus', '^physcraper:status': 'query', '^ot:ottId': 640718, '^ncbi:accession': 'JX895353.1', '^ncbi:gi': 429489188, '^physcraper:last_blasted': '1800/01/01'} # [u'JX895398.1', u'JX895353.1', u'JX895392.1', 'JX895513.1', 'JX895264.1'] # ## now only one scopolii # 1268590: [{'^ncbi:taxon': 1268590, '^ncbi:title': 'Senecio scopolii subsp. floccosus 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_scopolii_subsp._floccosus', '^physcraper:status': 'query', '^ot:ottId': 114541, '^ncbi:accession': 'JX895513.1', '^ncbi:gi': 429489348, '^physcraper:last_blasted': '1800/01/01'}], # 1268581: {'^ncbi:taxon': 1268581, '^ncbi:title': 'Senecio lopezii clone JC3604-12 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_lopezii', '^physcraper:status': 'query', '^ot:ottId': 688688, '^ncbi:accession': 'JX895264.1', '^ncbi:gi': 429489099, '^physcraper:last_blasted': '1800/01/01'}
def test_write_outputinfo(): workdir = "tests/output/test_write_output_files" configfi = "tests/data/test.config" downtorank = None absworkdir = os.path.abspath(workdir) fn_otu = os.path.join(absworkdir, "otu_seq_info.csv") fn_sampling = os.path.join(absworkdir, "taxon_sampling.csv") conf = physcraper.ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = physcraper.IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load( open("tests/data/precooked/tiny_acc_map.p", "rb")) filteredScrape = PhyscraperScrape(data_obj, ids) filteredScrape._blasted = 1 blast_dir = "tests/data/precooked/fixed/tte_blast_files" # filteredScrape.acc_list_mrca = pickle.load(open("tests/data/precooked/acc_list_mrca.p", 'rb')) filteredScrape.read_blast_wrapper(blast_dir=blast_dir) filteredScrape.remove_identical_seqs() # filteredScrape.sp_dict(downtorank) # filteredScrape.make_sp_seq_dict() filteredScrape.align_query_seqs() wrappers.write_out_files(filteredScrape, downtorank) with open(fn_otu) as fn: line = fn.readline() cnt = 1 while cnt <= 5: line = fn.readline() cnt += 1 assert type(line) == str assert line.split(",") >= 2 with open(fn_sampling) as fn: line = fn.readline() cnt = 1 while cnt <= 5: line = fn.readline() cnt += 1 assert type(line) == str assert line.split(",") >= 2
def test_unmapped(): conf = physcraper.ConfigObj(configfi, interactive=False) conf.unmapped = 'remove' data_obj = pickle.load( open("tests/data/precooked/otol_tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = physcraper.IdDicts(conf, workdir=data_obj.workdir) scraper2 = physcraper.PhyscraperScrape(data_obj, ids) num_remove = len(scraper2.data.aln.taxon_namespace) dict_id = 0 for tax in scraper.data.aln.taxon_namespace: if '^ot:ottId' in scraper.data.otu_dict[tax.label]: dict_id = dict_id + 1 # print(num_remove, num_keep, dict_id) assert num_remove <= num_keep - 1 assert num_keep == dict_id
sys.stdout.write("\nTesting 'opentree scrape (1 round)'\n") conf = physcraper.ConfigObj(configfi, interactive=False) print "1. {}".format(conf.email) aln = DnaCharacterMatrix.get(path=seqaln, schema=mattype) data_obj = physcraper.generate_ATT_from_phylesystem(aln=aln, workdir=workdir, study_id = study_id, tree_id = tree_id, phylesystem_loc = conf.phylesystem_loc) ids = physcraper.IdDicts(conf, workdir=workdir) print "3. {}".format(ids.config.email) data_obj.prune_short() assert len(data_obj.aln) == 9 data_obj.write_files() try: scraper = physcraper.PhyscraperScrape(data_obj, ids) scraper.run_blast_wrapper() scraper.read_blast_wrapper() scraper.remove_identical_seqs() scraper.generate_streamed_alignment() sys.stdout.write("\nTest opentree_scrape.py (round 1) passed\n")
def test_calculate_mean_sd(): conf = physcraper.ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = physcraper.IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load( open("tests/data/precooked/tiny_acc_map.p", "rb")) filteredScrape = physcraper.FilterBlast(data_obj, ids) # test begins fn = 'Senecio_scopolii_subsp._scopolii' # partly copy of read_local_blast_query general_wd = os.getcwd() if not os.path.exists(os.path.join(filteredScrape.workdir, "blast")): os.makedirs(os.path.join(filteredScrape.workdir, "blast")) fn_path = './tests/data/precooked/fixed/local-blast/{}'.format(fn) fn_path = os.path.abspath(fn_path) print(fn_path) os.chdir(os.path.join(filteredScrape.workdir, "blast")) local_blast.run_filter_blast(filteredScrape.workdir, fn_path, fn_path, output=os.path.join( filteredScrape.workdir, "blast/output_{}.xml".format(fn))) output_blast = os.path.join(filteredScrape.workdir, "blast/output_{}.xml".format(fn)) xml_file = open(output_blast) os.chdir(general_wd) blast_out = NCBIXML.parse(xml_file) hsp_scores = {} add_hsp = 0 for record in blast_out: for alignment in record.alignments: for hsp in alignment.hsps: gi = int(alignment.title.split(" ")[1]) hsp_scores[gi] = { "hsp.bits": hsp.bits, "hsp.score": hsp.score, "alignment.length": alignment.length, "hsp.expect": hsp.expect } add_hsp = add_hsp + float(hsp.bits) # make values to select for blast search, calculate standard deviation, mean mean_sed = local_blast.calculate_mean_sd(hsp_scores) sum_hsp = len(hsp_scores) mean = (add_hsp / sum_hsp) sd_all = 0 for item in hsp_scores: val = hsp_scores[item]["hsp.bits"] sd = (val - mean) * (val - mean) sd_all += sd sd_val = sqrt(sd_all / sum_hsp) # print((sd_val, 4), round(mean_sed['sd'], 4)) # print(mean,4), round(mean_sed['mean'], 4) assert round(sd_val, 4) == round(mean_sed['sd'], 4) assert round(mean, 4) == round(mean_sed['mean'], 4)
def test_select_seq_by_local_blast(): conf = physcraper.ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = physcraper.IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load(open("tests/data/precooked/tiny_acc_map.p", "rb")) filteredScrape = FilterBlast(data_obj, ids) filteredScrape.add_setting_to_self(downtorank, threshold) filteredScrape._blasted = 1 blast_dir = "tests/data/precooked/fixed/tte_blast_files" # filteredScrape.acc_list_mrca = pickle.load(open("tests/data/precooked/acc_list_mrca.p", 'rb')) filteredScrape.read_blast_wrapper(blast_dir=blast_dir) filteredScrape.remove_identical_seqs() filteredScrape.sp_dict(downtorank) filteredScrape.make_sp_seq_dict() ##this is the code of the first part of how many seq to keep. if threshold is bigger than number of seq for sp, just add all # print("start test") count = 0 for tax_id in filteredScrape.sp_d: count_dict = filteredScrape.count_num_seq(tax_id) if count_dict["new_taxon"]: if count_dict["query_count"] < threshold: count += count_dict["query_count"] if count_dict["query_count"] > threshold: count += threshold if count_dict["new_taxon"] is False: if count_dict["query_count"] >= 1: if count_dict["seq_present"] < threshold: count += threshold-count_dict["seq_present"] if count_dict["seq_present"] > threshold: count += 0 ######### # count refelcts what should be added, but through threshold the actual number might be lower # copy here from "select_seq_by_local_blast" for tax_id in filteredScrape.sp_d: count_dict = filteredScrape.count_num_seq(tax_id) seq_present = count_dict["seq_present"] query_count = count_dict["query_count"] new_taxon = count_dict["new_taxon"] seq_d = filteredScrape.sp_seq_d[tax_id] fn = tax_id count2 = seq_present if seq_present < threshold and query_count > 1: # if below threhold and more than 1 seq to blast # print("new taxon") # print(tax_id,query_count) # print(filteredScrape.sp_seq_d[tax_id].keys()) blast_seq_id = filteredScrape.sp_seq_d[tax_id].keys()[0] seq = filteredScrape.sp_seq_d[tax_id][blast_seq_id] local_blast.write_filterblast_query(filteredScrape.workdir, blast_seq_id, seq, fn=tax_id) # blast guy blast_db = filteredScrape.sp_seq_d[tax_id].keys()[1:] for blast_key in blast_db: seq = filteredScrape.sp_seq_d[tax_id][blast_key] local_blast.write_filterblast_db(filteredScrape.workdir, blast_key, seq, fn=tax_id) # make local blast of sequences local_blast.run_filter_blast(filteredScrape.workdir, tax_id, tax_id) seq_blast_score = local_blast.read_filter_blast(filteredScrape.workdir, seq_d, fn) if len(seq_blast_score.keys()) < ( threshold - count2): # less seq available than need to be added, just use all # print("add all") # print(len(seq_blast_score.keys())) thres_minus = (threshold - count2) - len(seq_blast_score.keys()) # random_seq_ofsp = seq_blast_score # print("thresminus:", thres_minus) # print("query_count:", query_count) # print(threshold - count2) count = count - thres_minus filteredScrape.how_many_sp_to_keep(selectby) # print(count, len(filteredScrape.filtered_seq)) assert count == len(filteredScrape.filtered_seq) and count>0 # #added before # #[429489224, 429489233, 429489188] # {'^ncbi:taxon': 1268591, '^ncbi:title': 'Senecio scopolii subsp. scopolii clone JC4715-6 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_scopolii_subsp._scopolii', '^physcraper:status': 'query', '^ot:ottId': 114544, '^ncbi:accession': 'JX895389.1', '^ncbi:gi': 429489224, '^physcraper:last_blasted': '1800/01/01'} # {'^ncbi:taxon': 1268591, '^ncbi:title': 'Senecio scopolii subsp. scopolii clone JC4715-15 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_scopolii_subsp._scopolii', '^physcraper:status': 'query', '^ot:ottId': 114544, '^ncbi:accession': 'JX895398.1', '^ncbi:gi': 429489233, '^physcraper:last_blasted': '1800/01/01'} # {'^ncbi:taxon': 1268580, '^ncbi:title': 'Senecio lagascanus clone JC5600-6 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_lagascanus', '^physcraper:status': 'query', '^ot:ottId': 640718, '^ncbi:accession': 'JX895353.1', '^ncbi:gi': 429489188, '^physcraper:last_blasted': '1800/01/01'} # [u'JX895398.1', u'JX895353.1', u'JX895392.1', 'JX895513.1', 'JX895264.1'] # ## now only one scopolii # 1268590: [{'^ncbi:taxon': 1268590, '^ncbi:title': 'Senecio scopolii subsp. floccosus 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_scopolii_subsp._floccosus', '^physcraper:status': 'query', '^ot:ottId': 114541, '^ncbi:accession': 'JX895513.1', '^ncbi:gi': 429489348, '^physcraper:last_blasted': '1800/01/01'}], # 1268581: {'^ncbi:taxon': 1268581, '^ncbi:title': 'Senecio lopezii clone JC3604-12 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence', '^ot:ottTaxonName': 'Senecio_lopezii', '^physcraper:status': 'query', '^ot:ottId': 688688, '^ncbi:accession': 'JX895264.1', '^ncbi:gi': 429489099, '^physcraper:last_blasted': '1800/01/01'}
tre.write(path="before2.tre", schema="nexus") data_obj = physcraper.generate_ATT_from_phylesystem(aln=aln, workdir=workdir, config_obj=conf, study_id=study_id, tree_id=tree_id) data_obj.write_files() json.dump(data_obj.otu_dict, open('{}/otu_dict.json'.format(workdir), 'wb')) sys.stdout.write("{} taxa in alignement and tree\n".format(len(data_obj.aln))) ids = physcraper.IdDicts(conf, workdir='treebase') scraper = physcraper.PhyscraperScrape(data_obj, ids) #scraper.read_blast_wrapper() scraper.est_full_tree() '''scraper.run_blast_wrapper() scraper.read_blast_wrapper() scraper.remove_identical_seqs() scraper.write_all_unaligned(filename="combo.fas") json.dump(data_obj.otu_dict, open('treebase/otu_dict2.json', 'wb')) scraper.generate_streamed_alignment()