Exemplo n.º 1
0
def _make_famplex_lookup():
    """Create a famplex lookup dictionary.

    Keys are sorted tuples of HGNC gene names and values are
    the corresponding FamPlex ID.
    """

    fplx_lookup = {}
    bio_ontology.initialize()
    for node in bio_ontology.nodes:
        ns, id = bio_ontology.get_ns_id(node)
        if ns == 'FPLX':
            children = bio_ontology.get_children(ns, id)
            hgnc_children = [
                bio_ontology.get_name(*c) for c in children if c[0] == 'HGNC'
            ]
            fplx_lookup[tuple(sorted(hgnc_children))] = id
    return fplx_lookup
Exemplo n.º 2
0
 def _add_node(self, agent, uuid=None):
     node_key = agent.name
     node_id = self._existing_nodes.get(node_key)
     # if the node already exists we do not want to add it again
     # we must however add its uuid
     if node_id is not None:
         # fetch the appropriate node
         n = [x for x in self._nodes if x['data']['id'] == node_id][0]
         uuid_list = n['data']['uuid_list']
         if uuid not in uuid_list:
             uuid_list.append(uuid)
         return node_id
     db_refs = _get_db_refs(agent)
     node_id = self._get_new_id()
     self._existing_nodes[node_key] = node_id
     node_name = agent.name
     node_name = node_name.replace('_', ' ')
     if 'FPLX' in db_refs:
         expanded_families = bio_ontology.get_children(
             *agent.get_grounding(), ns_filter={'HGNC'})
     else:
         expanded_families = []
     members = {}
     for member in expanded_families:
         member_db_refs = {member[0]: member[1]}
         member_db_refs = standardize_db_refs(member_db_refs)
         gene_name = bio_ontology.get_name(*member)
         members[gene_name] = {'db_refs': {}}
         for dbns, dbid in member_db_refs.items():
             url = get_identifiers_url(dbns, dbid)
             if url:
                 members[gene_name]['db_refs'][dbns] = url
     node = {
         'data': {
             'id': node_id,
             'name': node_name,
             'db_refs': db_refs,
             'parent': '',
             'members': members,
             'uuid_list': [uuid]
         }
     }
     self._nodes.append(node)
     return node_id
def get_pain_mol():
    PAIN_SIGNAL_MOL = {
        "Prostaglandins": "CHEBI:26333",
        "Brandykinin": "CHEBI:3165"
    }

    CHEBI_LIST = {}
    CHEBI_NAMES = {}
    for compounds, chebi_id in PAIN_SIGNAL_MOL.items():
        CHEBI_LIST[compounds] = \
            [children[1] for children in
             bio_ontology.get_children('CHEBI',
                                       chebi_id)]

        CHEBI_NAMES[compounds] = \
            [bio_ontology.get_name('CHEBI', ids)
             for ids in CHEBI_LIST[compounds]]

    return CHEBI_NAMES
Exemplo n.º 4
0
def get_genes_for_family(family_agent):
    """Return agents corresponding to specific genes in a given family agent"""
    from indra.ontology.bio import bio_ontology
    from indra.ontology.standardize \
        import standardize_agent_name
    family_grounding = family_agent.db_refs.get('FPLX')
    if not family_grounding:
        return []
    children = bio_ontology.get_children('FPLX', family_grounding)
    children = [c for c in children if c[0] == 'HGNC']
    child_agents = []
    for _, hgnc_id in children:
        child_agent = Agent(None,
                            db_refs={
                                'HGNC': hgnc_id,
                                'TYPE': 'ONT::GENE-PROTEIN'
                            })
        standardize_agent_name(child_agent, standardize_refs=True)
        child_agents.append(child_agent)
    child_agents = sorted(child_agents, key=lambda x: x.name)
    return child_agents
Exemplo n.º 5
0
    def set_style_expression_mutation(self, model, cell_line='A375_SKIN'):
        """Sets the fill color of each node based on its expression level
        on the given cell line, and the stroke color based on whether it is
        a mutation.

        Parameters
        ----------
        model: list<indra.statements.Statement>
            A list of INDRA statements
        cell_line: str
            A cell line for which we're interested in protein expression level
        """
        labels = self.label_to_glyph_ids.keys()

        label_to_agent = {}
        for label in labels:
            for statement in model:
                for agent in statement.agent_list():
                    if agent is not None and _n(agent.name) == label:
                        label_to_agent[label] = agent

        agent_to_expression_level = {}
        for agent in label_to_agent.values():
            if 'HGNC' not in agent.db_refs and 'FPLX' not in agent.db_refs:
                # This is not a gene
                agent_to_expression_level[agent] = 0
                continue

            if 'FPLX' not in agent.db_refs:
                gene_names = [agent.name]
            else:
                children = bio_ontology.get_children('FPLX',
                                                     agent.db_refs['FPLX'])
                gene_names = [bio_ontology.get_name(*child) for child
                              in children]

            # Compute mean expression level
            expression_levels = []
            logger.info('Getting expression status of proteins: %s' %
                        str(gene_names))
            l = self.get_expression(gene_names, cell_line)
            for line in l:
                for element in l[line]:
                    level = l[line][element]
                    if level is not None:
                        expression_levels.append(l[line][element])
            if len(expression_levels) == 0:
                mean_level = None
            else:
                mean_level = sum(expression_levels) / len(expression_levels)

            agent_to_expression_level[agent] = mean_level

        # Create a normalized expression score between 0 and 1
        # Compute min and maximum levels
        min_level = None
        max_level = None
        for agent, level in agent_to_expression_level.items():
            if level is None:
                continue
            if min_level is None:
                min_level = level
            if max_level is None:
                max_level = level
            if level < min_level:
                min_level = level
            if level > max_level:
                max_level = level
        # Compute scores
        agent_to_score = {}
        if max_level is not None:
            level_span = max_level - min_level
        for agent, level in agent_to_expression_level.items():
            if level is None or level_span == 0:
                agent_to_score[agent] = 0
            else:
                agent_to_score[agent] = (level - min_level) / level_span

        # Map scores to colors and assign colors to labels
        agent_to_color = {}
        for agent, score in agent_to_score.items():
            if 'HGNC' not in agent.db_refs and 'FPLX' not in agent.db_refs:
                color = cm.Blues(0.3)
                color_str = colors.to_hex(color[:3])
            else:
                # color = cm.plasma(score)
                color = cm.Greens(0.6*score + 0.2)
                color_str = colors.to_hex(color[:3])
            assert(len(color_str) == 7)
            stroke_color = \
                    self._choose_stroke_color_from_mutation_status(agent.name,
                                                                   cell_line)
            self.set_style(agent.name, stroke_color, color_str)
Exemplo n.º 6
0
def test_mtorc_children():
    ch1 = bio_ontology.get_children('FPLX', 'mTORC1')
    ch2 = bio_ontology.get_children('FPLX', 'mTORC2')
    assert ('HGNC', hgnc_client.get_hgnc_id('RICTOR')) not in ch1
    assert ('HGNC', hgnc_client.get_hgnc_id('RPTOR')) not in ch2
Exemplo n.º 7
0
import pickle
from collections import Counter
from emmaa.model_tests import StatementCheckingTest
from indra.ontology.bio import bio_ontology
from indra.statements.validate import print_validation_report
from indra.ontology.standardize import standardize_agent_name

CTD_CHEMICAL_DISEASE = '/Users/ben/data/ctd/ctd_chemical_disease.pkl'

pain = ('MESH', 'D010146')
pain_and_children = [pain] + bio_ontology.get_children(*pain)


def filter_objects(stmts, object_groundings):
    print('Filtering %d statements' % len(stmts))
    filtered_stmts = []
    for stmt in stmts:
        if set(stmt.obj.db_refs.items()) & set(object_groundings):
            filtered_stmts.append(stmt)
    print('Filtered to %d statements' % len(filtered_stmts))
    return filtered_stmts


def get_mappings():
    import gilda
    mappings = set()
    for stmt in stmts:
        subj = stmt.subj
        if 'CHEBI' in subj.db_refs:
            continue
        matches = gilda.ground(subj.name)
def expand_with_child_go_terms(terms):
    all_terms = set(terms)
    for term in terms:
        child_terms = bio_ontology.get_children('GO', term)
        all_terms |= {c[1] for c in child_terms}
    return all_terms
Exemplo n.º 9
0
                                 for compound, names in PAIN_MOL_NAMES.items()
                                 if rows[2] in names]
    df = pd.DataFrame(celltype_pain_interaction)
    return df


if __name__ == "__main__":
    df = pd.read_csv(PC_SIF_URL, sep='\t', header=None)
    df = df[df[1] == 'controls-production-of']

    pain_signal_mol = {
        "Prostaglandins": "CHEBI:26333",
        "Brandykinin": "CHEBI:3165"
    }

    chebi_list = {}
    for compounds, chebi_id in pain_signal_mol.items():
        chebi_list[compounds] = [
            children[1]
            for children in bio_ontology.get_children('CHEBI', chebi_id)
        ]
    df = df[df[2].isin(chebi_list)]
    chebi_stmts = [{
        'Enzyme': row[0],
        'Statement': row[1],
        'CHEBI_ID': row[2],
        'CHEBI_Name': bio_ontology.get_name('CHEBI', row[2])
    } for _, row in df.iterrows()]
    df = pd.DataFrame(chebi_stmts)
    df.to_csv("enzyme_interactions.tsv", sep="\t", header=True, index=False)