예제 #1
0
def build_prior(gene_names):
    """Build a corpus of prior Statements from PC and BEL."""
    gn = GeneNetwork(gene_names, basen)
    # Read BEL Statements
    bel_stmts = gn.get_bel_stmts(filter=False)
    ac.dump_statements(bel_stmts, prefixed_pkl('bel'))
    # Read Pathway Commons Statements
    database_filter = ['reactome', 'kegg', 'pid']
    biopax_stmts = gn.get_biopax_stmts(database_filter=database_filter)
    # Eliminate blacklisted interactions
    tmp_stmts = []
    for stmt in biopax_stmts:
        source_ids = [ev.source_id for ev in stmt.evidence]
        if set(source_ids) & set(biopax_blacklist):
            continue
        tmp_stmts.append(stmt)
    biopax_stmts = tmp_stmts
    ac.dump_statements(biopax_stmts, prefixed_pkl('biopax'))
    # Read Phosphosite Statements
    phosphosite_stmts = read_phosphosite_owl(phosphosite_owl_file)
    ac.dump_statements(phosphosite_stmts, prefixed_pkl('phosphosite'))
예제 #2
0
        path_str = ''
        for ix, (node, sign) in enumerate(path):
            if ix == 0:
                path_str += node
            else:
                if sign == last_sign:
                    path_str += ' -> %s' % node
                else:
                    path_str += ' -| %s' % node
            last_sign = sign
        print('%s : score %s' % (path_str, score))


if __name__ == '__main__':
    # Run run_task1.py before running this one
    with open(prefixed_pkl('pysb_stmts'), 'rb') as f:
        stmts = pickle.load(f)
    with open('scored_paths.pkl', 'rb') as f:
        (scored_paths, model) = pickle.load(f)

    all_groups = set()
    all_path_details = {}
    for cell_line, drug_dict in scored_paths.items():
        for drug, paths in drug_dict.items():
            groups, path_details = group_scored_paths(paths, model, stmts)
            for pg, path_list in path_details.items():
                if pg in all_path_details:
                    all_path_details[pg] |= path_list
                else:
                    all_path_details[pg] = path_list
            all_groups |= groups
예제 #3
0
def assemble_pysb(stmts, data_genes, contextualize=False):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    # Strip the extraneous supports/supported by here
    strip_supports(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    stmts = normalize_active_forms(ml.statements)
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    # Save the Statements here
    ac.dump_statements(stmts, prefixed_pkl('pysb_stmts'))


    # Add drug target Statements
    drug_target_stmts = get_drug_target_statements()
    stmts += drug_target_stmts

    # Just generate the generic model
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()
    with open(prefixed_pkl('pysb_model'), 'wb') as f:
        pickle.dump(model, f)

    # Run this extra part only if contextualize is set to True
    if not contextualize:
        return

    cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C']
    for cell_line in cell_lines:
        if cell_line not in cell_lines_no_data:
            stmtsc = contextualize_stmts(stmts, cell_line, data_genes)
        else:
            stmtsc = stmts
        pa = PysbAssembler()
        pa.add_statements(stmtsc)
        model = pa.make_model()
        if cell_line not in cell_lines_no_data:
            contextualize_model(model, cell_line, data_genes)
        ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line))
        with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f:
            pickle.dump(model, f)
예제 #4
0
            stmts_by_ag[stmt.agent.name].append(stmt)
        except KeyError:
            stmts_by_ag[stmt.agent.name] = [stmt]
    unique_by_ag = {}
    for k, v in stmts_by_ag.items():
        for st in v:
            found = False
            try:
                uniques = unique_by_ag[k]
            except KeyError:
                unique_by_ag[k] = []
                uniques = []
            for stmt in uniques:
                if stmt.equals(st):
                    found = True
                    break
            if not found:
                unique_by_ag[k].append(st)
    new_stmts = []
    for k, v in unique_by_ag.items():
        new_stmts += v
    return new_stmts


if __name__ == '__main__':
    stmts = []
    for aff in active_forms_files:
        stmts = read_stmts(aff)
    with open(prefixed_pkl('r3'), 'wb') as fh:
        pickle.dump(stmts, fh)
예제 #5
0
    fnames = glob.glob(os.path.join(base_dir, '*.ekb'))
    return fnames


def get_file_stmts(fname):
    with open(fname, 'rt') as fh:
        xml_str = fh.read()
        tp = trips.process_xml(xml_str)
        if tp is None:
            return []
        return tp.statements


def read_stmts(folder):
    fnames = get_file_names(folder)
    all_stmts = []
    for i, fname in enumerate(fnames):
        print('%d/%d' % (i, len(fnames)))
        print(fname)
        print('=' * len(fname))
        st = get_file_stmts(fname)
        all_stmts += st
    return all_stmts


if __name__ == '__main__':
    stmts = read_stmts(base_folder)
    print('Collected %d Statements from TRIPS' % len(stmts))
    with open(prefixed_pkl('trips'), 'wb') as fh:
        pickle.dump(stmts, fh)