Exemplos de filter_by_curation em Python, exemplos de indra.tools.assemble_corpus.filter_by_curation em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_assemble_corpus.py Projeto: bioinfonerd-forks/indra-1

def test_filter_by_curation():
    new_st1 = deepcopy(st1)
    new_ev = Evidence(text='a -> b', source_api='new')
    new_st1.evidence.append(new_ev)
    stmts_in = [new_st1, st2, st3]
    assert len(new_st1.evidence) == 2
    assert all(st.belief != 1 for st in stmts_in)
    Curation = namedtuple('Curation', ['pa_hash', 'source_hash', 'tag'])
    cur1 = Curation(new_st1.get_hash(), new_st1.evidence[0].get_source_hash(),
                    'grounding')
    cur2 = Curation(new_st1.get_hash(), new_st1.evidence[1].get_source_hash(),
                    'wrong_relation')
    cur3 = Curation(new_st1.get_hash(), new_st1.evidence[0].get_source_hash(),
                    'correct')
    cur4 = Curation(st2.get_hash(), st2.evidence[0].get_source_hash(),
                    'correct')
    # With 'any' policy it is enough to have one incorrect curation
    any_incorrect_one_cur = ac.filter_by_curation(stmts_in, [cur1], 'any')
    assert len(any_incorrect_one_cur) == 2
    assert new_st1 not in any_incorrect_one_cur
    # With 'all' policy all evidences have to be curated
    all_incorrect_one_cur = ac.filter_by_curation(stmts_in, [cur1], 'all')
    assert len(all_incorrect_one_cur) == 3, len(all_incorrect_one_cur)
    assert new_st1 in all_incorrect_one_cur
    all_incorrect_two_cur = ac.filter_by_curation(stmts_in, [cur1, cur2],
                                                  'all')
    assert len(all_incorrect_two_cur) == 2
    assert new_st1 not in all_incorrect_two_cur
    # Correct curation cancels out incorrect
    assert len(new_st1.evidence) == 2
    correct_incorrect = ac.filter_by_curation(stmts_in,
                                              [cur1, cur2, cur3, cur4],
                                              'all',
                                              update_belief=False)
    assert len(correct_incorrect) == 3, len(correct_incorrect)
    assert new_st1 in correct_incorrect
    # new_st1.evidence[1] should be filtered out because there's only incorrect
    # curation(cur2), new_st1.evidence[0] stays because correct cancels out
    # incorrect (cur1, cur3)
    assert len(new_st1.evidence) == 1
    assert new_st1.evidence[0].source_api == 'assertion'
    assert all(st.belief != 1 for st in correct_incorrect)
    # Optionally update belief to 1 for correct curation
    new_belief = ac.filter_by_curation(stmts_in, [cur1, cur2, cur3, cur4],
                                       'all',
                                       update_belief=True)
    assert new_belief[0].belief == 1
    assert new_belief[1].belief == 1
    assert new_belief[2].belief == 0.7

Exemplo n.º 2

0

Exibir arquivo

Arquivo: make_drugs_for_target_reports.py Projeto: kkaris/covid-19

def get_statements(target):
    #tas_stmts = get_tas_stmts(target)
    db_stmts = get_db_stmts(target)
    stmts = db_stmts
    #stmts = tas_stmts + db_stmts
    stmts = filter_misgrounding(target, stmts)
    stmts = ac.run_preassembly(stmts)
    stmts = ac.filter_by_curation(stmts, db_curations)
    stmts = filter_neg(stmts)
    return stmts

Exemplo n.º 3

0

Exibir arquivo

    def _filter_stmts(self, stmts):
        """This is an internal function that is applied to filter statements.

        In general, this does nothing, but some sub classes may want to limit
        the statements that are presented. This is applied to both the complete
        statements list (retrieved by `get_statements`) and the sample (gotten
        through `get_sample`).
        """
        stmts = filter_by_curation(stmts, curations=curs)
        return stmts

Exemplo n.º 4

0

Exibir arquivo

def get_statements(target):
    tas_stmts = get_tas_stmts(target)
    db_stmts = get_db_stmts(target)
    stmts = filter_misgrounding(target, tas_stmts + db_stmts)
    stmts = ac.run_preassembly(stmts)
    stmts = ac.filter_by_curation(stmts, db_curations)

    ev_counts = {s.get_hash(): len(s.evidence) for s in stmts}
    source_counts = {}
    for stmt in stmts:
        stmt_source_counts = get_source_counts_dict()
        for ev in stmt.evidence:
            stmt_source_counts[ev.source_api] += 1
        source_counts[stmt.get_hash()] = stmt_source_counts
    return stmts, ev_counts, source_counts

Exemplo n.º 5

0

Exibir arquivo

def assemble_statements(kinase, stmts, curs):
    """Run assembly steps on statements."""
    # Remove unary statements and ones with many agents
    stmts = [stmt for stmt in stmts if (1 < len(stmt.real_agent_list()) < 4)]
    stmts = replace_ctd(stmts, ctd_stmts_by_gene.get(kinase, []))
    # We do this at this point to make sure we capture the original DB
    # hashes before modifying statements to allow lookup
    for stmt in stmts:
        for ev in stmt.evidence:
            ev.annotations['prior_hash'] = stmt.get_hash()
    stmts = fix_invalidities(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_by_curation(stmts, curations=curs)
    stmts = unify_lspci(stmts)
    stmts = remove_contradictions(stmts)
    # Rename chemicals
    logger.info('Renaming chemicals')
    for stmt in stmts:
        for agent in stmt.real_agent_list():
            if agent.db_refs.get('CHEBI') and len(agent.name) > 25:
                rename_chemical(agent)
    # Remove long names
    logger.info('Removing statements with long names')
    stmts = [
        stmt for stmt in stmts if all(
            len(a.name) < 20 for a in stmt.real_agent_list())
    ]
    logger.info('%d statements remaining' % len(stmts))
    # Remove microRNAs
    logger.info('Removing microRNA statements')
    stmts = [
        stmt for stmt in stmts
        if not any('miR' in a.name for a in stmt.real_agent_list())
    ]
    logger.info('%d statements remaining' % len(stmts))
    stmts = add_source_urls(stmts)
    with open('data/assembled/%s.pkl' % kinase, 'wb') as fh:
        pickle.dump(stmts, fh)
    return stmts

Exemplo n.º 6

0

Exibir arquivo

def filter_incorrect_curations(stmts):
    # Filter incorrect curations
    indra_op_filtered = ac.filter_by_curation(stmts, curations=db_curations)
    return indra_op_filtered

Exemplo n.º 7

0

Exibir arquivo

        gilda_obj = gilda.ground(obj)
        gilda_obj = gilda_obj[0].term.entry_name if gilda_obj else 'NA'

        normalized_df.append({
            'Subject': subj,
            'Normalized subject': gilda_subj,
            'Object': obj,
            'Normalized object': gilda_obj
        })
        # Downloading statements using INDRA REST API
        idrp = idr.get_statements(subject=gilda_subj, object=gilda_obj)
        stmts = stmts + idrp.statements

    # Filtering out the indirect INDRA statements
    #indra_stmts = ac.filter_direct(stmts)
    indra_stmts = ac.run_preassembly(stmts, run_refinement=False)
    indra_filtered = ac.filter_by_curation(indra_stmts, curations=db_curations)

    indra_op_filtered = filter_complex_statements(indra_filtered, subj_set,
                                                  obj_set)

    indra_op_filtered = ac.run_preassembly(indra_op_filtered,
                                           run_refinement=False)

    html_assembler(indra_op_filtered,
                   os.path.join(OUTPUT, file + '_indra_report.html'))

    normalized_df = pd.DataFrame(normalized_df)
    normalized_df.to_csv(
        os.path.join(INPUT, file, file + '_normalized_names.csv'))

Exemplo n.º 8

0

Exibir arquivo

Arquivo: create_cellphonedb_database.py Projeto: samuelbunga/panacea_indra

    indra_db_stmts = list(stmts_by_hash.values())

    # Filtering out the indirect INDRA statements
    indra_db_stmts = ac.filter_direct(indra_db_stmts)

    # Fetch omnipath database biomolecular interactions and
    # process them into INDRA statements
    op = process_from_web()

    # Filter statements which are not ligands/receptors from
    # OmniPath database
    op_filtered = filter_op_stmts(op.statements, full_ligand_set,
                                  receptor_genes_go)
    op_filtered = ac.filter_direct(op_filtered)

    op_filtered = ac.filter_by_curation(op_filtered, curations=db_curations)

    # Merge omnipath/INDRA statements and run assembly
    indra_op_stmts = ac.run_preassembly(indra_db_stmts + op_filtered,
                                        run_refinement=False)
    # Filter incorrect curations
    indra_op_filtered = filter_incorrect_curations(indra_op_stmts)

    # Filter complex statements
    indra_op_filtered = filter_complex_statements(indra_op_filtered,
                                                  full_ligand_set,
                                                  receptor_genes_go)

    # We do this again because when removing complex members, we
    # end up with more duplicates
    indra_op_filtered = ac.run_preassembly(indra_op_filtered,

Exemplo n.º 9

0

Exibir arquivo

    with open('../../grounding_map.json', 'r') as fh:
        grounding_map = json.load(fh)
    #####################

    # Querying for and assembling statements
    all_stmts = []
    for db_ns, db_id, name in groundings:
        if db_id in black_list:
            print('Skipping %s in black list' % name)
            continue
        print('Looking up %s' % name)
        db_stmts = get_db_stmts_by_grounding(db_ns, db_id)
        tas_stmts = get_tas_stmts(db_ns, db_id) if db_ns == 'HGNC' else []
        stmts = db_stmts + tas_stmts
        smts = ac.filter_by_curation(stmts, db_curations)
        stmts = reground_stmts(stmts, grounding_map, misgrounding_map)
        all_stmts += stmts
    all_stmts = make_unique_hashes(all_stmts)
    all_stmts = ac.run_preassembly(all_stmts)
    ########################################

    # Dunp results
    with open('disease_map_indra_stmts_full.pkl', 'wb') as fh:
        pickle.dump(all_stmts, fh)

    stmts_to_json_file(all_stmts, 'disease_map_indra_stmts_full.json')

    filtered_stmts = filter_prior_all(all_stmts, groundings)
    with open('disease_map_indra_stmts_filtered.pkl', 'wb') as fh:
        pickle.dump(filtered_stmts, fh)