Пример #1
0
def output_parser(outdir, prefix):
    '''output: article obj list'''
    tm_tk = outdir + '/' + prefix + '.tmvarI.tk.PubTator'
    tm_ab = outdir + '/' + prefix + '.tmvarI.abstract.PubTator'
    tm_re = outdir + '/' + prefix + '.tmvarI.results.PubTator'
    dn_tk = outdir + '/' + prefix + '.DNormO.tk'
    dn_ab = outdir + '/' + prefix + '.DNormO.abstract'
    dn_re = outdir + '/' + prefix + '.DNormO.results'
    gn_dir = outdir + '/' + prefix + '.GNormPlusO'
    articles = []
    for pmid, string, mutation_entry_ls in tmvar_parser(tm_tk):
        article_obj = Base.Article(pmid)  # create article_obj
        tk_obj = Base.TK(string)  # create tk_obj
        for e in mutation_entry_ls:
            tk_obj.add_e(e)  # add entry
        article_obj.add_tk(tk_obj)
        articles.append(article_obj)
    # abstract
    for pmid, string, mutation_entry_ls in tmvar_parser(tm_ab):
        article_obj = found_article(pmid, articles)  # found article_obj
        abstract_obj = Base.Abstract(string)  # create abstract_obj
        for e in mutation_entry_ls:
            abstract_obj.add_e(e)  # add entry
        article_obj.add_abstract(abstract_obj)
    # results
    for pmid, string, mutation_entry_ls in tmvar_parser(tm_re):
        article_obj = found_article(pmid, articles)  # found article_obj
        results_obj = Base.Results(string)  # create tk_obj
        for e in mutation_entry_ls:
            results_obj.add_e(e)  # add entry
        article_obj.add_results(results_obj)
    # DNorm
    for pmid, entry in DNorm_parser(dn_tk):
        article_obj = found_article(pmid, articles)
        article_obj.tk.add_e(entry)
    for pmid, entry in DNorm_parser(dn_ab):
        article_obj = found_article(pmid, articles)
        article_obj.abstract.add_e(entry)
    for pmid, entry in DNorm_parser(dn_re):
        article_obj = found_article(pmid, articles)
        article_obj.results.add_e(entry)
    # GNormPlus
    for pmid, entry, fn in GNormPlus_parser(gn_dir):
        article_obj = found_article(pmid, articles)
        if re.search('tk', fn):
            article_obj.tk.add_e(entry)
        elif re.search('abstract', fn):
            article_obj.abstract.add_e(entry)
        else:
            article_obj.results.add_e(entry)
    return articles
Пример #2
0
def output_parser(outdir, prefix):
    '''output: article obj list'''
    tm_tk = outdir + '/' + prefix + '.tmvarI.tk.PubTator'
    tm_ab = outdir + '/' + prefix + '.tmvarI.abstract.PubTator'
    tm_re = outdir + '/' + prefix + '.tmvarI.results.PubTator'
    dn_tk = outdir + '/' + prefix + '.DNormO.tk'
    dn_ab = outdir + '/' + prefix + '.DNormO.abstract'
    dn_re = outdir + '/' + prefix + '.DNormO.results'
    gn_tk = outdir + '/' + prefix + '.GNormPlusO.tk'
    gn_ab = outdir + '/' + prefix + '.GNormPlusO.abstract'
    gn_re = outdir + '/' + prefix + '.GNormPlusO.results'
    emu_tk = outdir + '/EMU_1.19_HUGO_' + prefix + '.EMU.tk'
    emu_ab = outdir + '/EMU_1.19_HUGO_' + prefix + '.EMU.abstract'
    emu_re = outdir + '/EMU_1.19_HUGO_' + prefix + '.EMU.results'
    articles = []
    for pmid, string, mutation_entry_ls in var_parser(tm_tk, emu_tk):
        article_obj = Base.Article(pmid)  # create article_obj
        tk_obj = Base.TK(string)  # create tk_obj
        for e in mutation_entry_ls:
            tk_obj.add_e(e)  # add entry
        article_obj.add_tk(tk_obj)
        articles.append(article_obj)
    # abstract
    for pmid, string, mutation_entry_ls in var_parser(tm_ab, emu_ab):
        article_obj = found_article(pmid, articles)  # found article_obj
        abstract_obj = Base.Abstract(string)  # create abstract_obj
        for e in mutation_entry_ls:
            abstract_obj.add_e(e)  # add entry
        article_obj.add_abstract(abstract_obj)
    # results
    has_results = True
    try:
        for pmid, string, mutation_entry_ls in var_parser(tm_re, emu_ab):
            article_obj = found_article(pmid, articles)  # found article_obj
            results_obj = Base.Results(string)  # create tk_obj
            for e in mutation_entry_ls:
                results_obj.add_e(e)  # add entry
            article_obj.add_results(results_obj)
    except FileNotFoundError as e:
        has_results = False
        print('no results: {0}'.format(e))
    # DNorm
    for pmid, entry in DNorm_parser(dn_tk):
        article_obj = found_article(pmid, articles)
        article_obj.tk.add_e(entry)
    for pmid, entry in DNorm_parser(dn_ab):
        article_obj = found_article(pmid, articles)
        article_obj.abstract.add_e(entry)
    if has_results:
        for pmid, entry in DNorm_parser(dn_re):
            article_obj = found_article(pmid, articles)
            article_obj.results.add_e(entry)
    # GNormPlus
    for pmid, entry in GNormPlus_parser(gn_tk):
        article_obj = found_article(pmid, articles)
        article_obj.tk.add_e(entry)
    for pmid, entry in GNormPlus_parser(gn_ab):
        article_obj = found_article(pmid, articles)
        article_obj.abstract.add_e(entry)
    if has_results:
        for pmid, entry in GNormPlus_parser(gn_re):
            article_obj = found_article(pmid, articles)
            article_obj.results.add_e(entry)
    '''sort entry'''
    for i in articles:
        if i.abstract:  # possible no abstract
            i.abstract.sort_entry()
        if i.results:  # possible no results
            i.results.sort_entry()
            pass
    return articles