def output_parser(outdir, prefix): '''output: article obj list''' tm_tk = outdir + '/' + prefix + '.tmvarI.tk.PubTator' tm_ab = outdir + '/' + prefix + '.tmvarI.abstract.PubTator' tm_re = outdir + '/' + prefix + '.tmvarI.results.PubTator' dn_tk = outdir + '/' + prefix + '.DNormO.tk' dn_ab = outdir + '/' + prefix + '.DNormO.abstract' dn_re = outdir + '/' + prefix + '.DNormO.results' gn_dir = outdir + '/' + prefix + '.GNormPlusO' articles = [] for pmid, string, mutation_entry_ls in tmvar_parser(tm_tk): article_obj = Base.Article(pmid) # create article_obj tk_obj = Base.TK(string) # create tk_obj for e in mutation_entry_ls: tk_obj.add_e(e) # add entry article_obj.add_tk(tk_obj) articles.append(article_obj) # abstract for pmid, string, mutation_entry_ls in tmvar_parser(tm_ab): article_obj = found_article(pmid, articles) # found article_obj abstract_obj = Base.Abstract(string) # create abstract_obj for e in mutation_entry_ls: abstract_obj.add_e(e) # add entry article_obj.add_abstract(abstract_obj) # results for pmid, string, mutation_entry_ls in tmvar_parser(tm_re): article_obj = found_article(pmid, articles) # found article_obj results_obj = Base.Results(string) # create tk_obj for e in mutation_entry_ls: results_obj.add_e(e) # add entry article_obj.add_results(results_obj) # DNorm for pmid, entry in DNorm_parser(dn_tk): article_obj = found_article(pmid, articles) article_obj.tk.add_e(entry) for pmid, entry in DNorm_parser(dn_ab): article_obj = found_article(pmid, articles) article_obj.abstract.add_e(entry) for pmid, entry in DNorm_parser(dn_re): article_obj = found_article(pmid, articles) article_obj.results.add_e(entry) # GNormPlus for pmid, entry, fn in GNormPlus_parser(gn_dir): article_obj = found_article(pmid, articles) if re.search('tk', fn): article_obj.tk.add_e(entry) elif re.search('abstract', fn): article_obj.abstract.add_e(entry) else: article_obj.results.add_e(entry) return articles
def output_parser(outdir, prefix): '''output: article obj list''' tm_tk = outdir + '/' + prefix + '.tmvarI.tk.PubTator' tm_ab = outdir + '/' + prefix + '.tmvarI.abstract.PubTator' tm_re = outdir + '/' + prefix + '.tmvarI.results.PubTator' dn_tk = outdir + '/' + prefix + '.DNormO.tk' dn_ab = outdir + '/' + prefix + '.DNormO.abstract' dn_re = outdir + '/' + prefix + '.DNormO.results' gn_tk = outdir + '/' + prefix + '.GNormPlusO.tk' gn_ab = outdir + '/' + prefix + '.GNormPlusO.abstract' gn_re = outdir + '/' + prefix + '.GNormPlusO.results' emu_tk = outdir + '/EMU_1.19_HUGO_' + prefix + '.EMU.tk' emu_ab = outdir + '/EMU_1.19_HUGO_' + prefix + '.EMU.abstract' emu_re = outdir + '/EMU_1.19_HUGO_' + prefix + '.EMU.results' articles = [] for pmid, string, mutation_entry_ls in var_parser(tm_tk, emu_tk): article_obj = Base.Article(pmid) # create article_obj tk_obj = Base.TK(string) # create tk_obj for e in mutation_entry_ls: tk_obj.add_e(e) # add entry article_obj.add_tk(tk_obj) articles.append(article_obj) # abstract for pmid, string, mutation_entry_ls in var_parser(tm_ab, emu_ab): article_obj = found_article(pmid, articles) # found article_obj abstract_obj = Base.Abstract(string) # create abstract_obj for e in mutation_entry_ls: abstract_obj.add_e(e) # add entry article_obj.add_abstract(abstract_obj) # results has_results = True try: for pmid, string, mutation_entry_ls in var_parser(tm_re, emu_ab): article_obj = found_article(pmid, articles) # found article_obj results_obj = Base.Results(string) # create tk_obj for e in mutation_entry_ls: results_obj.add_e(e) # add entry article_obj.add_results(results_obj) except FileNotFoundError as e: has_results = False print('no results: {0}'.format(e)) # DNorm for pmid, entry in DNorm_parser(dn_tk): article_obj = found_article(pmid, articles) article_obj.tk.add_e(entry) for pmid, entry in DNorm_parser(dn_ab): article_obj = found_article(pmid, articles) article_obj.abstract.add_e(entry) if has_results: for pmid, entry in DNorm_parser(dn_re): article_obj = found_article(pmid, articles) article_obj.results.add_e(entry) # GNormPlus for pmid, entry in GNormPlus_parser(gn_tk): article_obj = found_article(pmid, articles) article_obj.tk.add_e(entry) for pmid, entry in GNormPlus_parser(gn_ab): article_obj = found_article(pmid, articles) article_obj.abstract.add_e(entry) if has_results: for pmid, entry in GNormPlus_parser(gn_re): article_obj = found_article(pmid, articles) article_obj.results.add_e(entry) '''sort entry''' for i in articles: if i.abstract: # possible no abstract i.abstract.sort_entry() if i.results: # possible no results i.results.sort_entry() pass return articles