Exemplo n.º 1
0
    def get_gene_ontology_access(self):
        """
        Loads all of the relations between the UNIPROTs and GOs as one giant dictionary

        """
        uniprots_without_gene_ontology_terms = 0
        log.info('Starting GO matrix mapping starting from %s uniprots', len(self.InitSet))
        for uniprot_bulbs_id in self.InitSet:
            uniprot_specific_gos = []
            up_node = DatabaseGraph.UNIPORT.get(uniprot_bulbs_id)
            # Yeah, this is a dangerous one, we need to avoid failure on that one
            self.UP_Names[uniprot_bulbs_id] = [up_node.ID, up_node.displayName]
            attached_go_nodes = up_node.bothV("is_go_annotation")
            if attached_go_nodes:
                for go_node in attached_go_nodes:
                    if go_node.Namespace in self.go_namespace_filter:
                        go_node_bulbs_id = get_bulbs_id(go_node)
                        uniprot_specific_gos.append(go_node_bulbs_id)
                        self.GO2UP[go_node_bulbs_id].append(uniprot_bulbs_id)
                        self.SeedSet.add(go_node_bulbs_id)
            if not uniprot_specific_gos:
                uniprots_without_gene_ontology_terms += 1
                log.debug("UP without GO was found. UP bulbs_id: %s, \t name: %s",
                          uniprot_bulbs_id, self.UP_Names[uniprot_bulbs_id])
                self.UPs_without_GO.add(uniprot_bulbs_id)
            else:
                self.UP2GO_Dict[uniprot_bulbs_id] = copy(uniprot_specific_gos)

        log.info('total number of UPs without a go_node annotation: %s out of %s',
                 uniprots_without_gene_ontology_terms, len(self.InitSet))
Exemplo n.º 2
0
    def compute_uniprot_attachments(self):
        """
        Computes the dictionary of attachments between the reached_uniprots_bulbs_id_list and
        Reactome proteins
        """
        log.info('attaching reactome proteins to uniprot nodes')
        uniprot_attachments_counter = 0
        reactome_attachments_counter = 0

        for uniprot_bulbs_id in self.reached_uniprots_bulbs_id_list:
            bulbs_node = DatabaseGraph.UNIPORT.get(uniprot_bulbs_id)
            reactome_bulbs_id_generator = bulbs_node.bothV("is_same")
            if reactome_bulbs_id_generator is not None:
                self.Uniprot_attachments[uniprot_bulbs_id] = []
                for uniprot_alias in reactome_bulbs_id_generator:
                    self.Uniprot_attachments[uniprot_bulbs_id].append(get_bulbs_id(uniprot_alias))
                uniprot_attachments_counter += 1
                reactome_attachments_counter += len(self.Uniprot_attachments[uniprot_bulbs_id])
                log.debug('attached %s Reactome proteins to the node %s',
                          len(self.Uniprot_attachments[uniprot_bulbs_id]), uniprot_bulbs_id)
            else:
                log.debug('No attachment for the node %s', uniprot_bulbs_id)

        log.info('Attached %s reactome protein nodes to %s / %s uniprot nodes',
                 reactome_attachments_counter,
                 uniprot_attachments_counter, len(self.reached_uniprots_bulbs_id_list))
Exemplo n.º 3
0
def insert_meta_objects(bulbs_graph_class, meta_id_2_property_dict):
    """
    Inserst a Meta-Object (I.e. any physical entity or collection thereof) as a member of a
     bulbs class and pumping the bioflow information from the property bioflow

    :param bulbs_graph_class:
    :param meta_id_2_property_dict:
    """
    total_properties = len(meta_id_2_property_dict)

    config = bulbs_graph_class.client.config
    element_type = bulbs_graph_class.element_class.get_element_type(config)

    for i, (meta_name,
            property_dict) in enumerate(meta_id_2_property_dict.iteritems()):
        if i * 20 % total_properties < 21:
            log.info('insert %s: %s out of %s', element_type, i,
                     total_properties)

        primary = bulbs_graph_class.create(
            ID=meta_name,
            displayName=property_dict['displayName'],
            localization=property_dict['cellularLocation'],
            main_connex=False)

        if meta_name in Leg_ID_Filter:
            ForbiddenIDs.append(get_bulbs_id(primary))

        memoization_dict[meta_name] = primary

        insert_minimal_annotations(
            memoization_dict[meta_name],
            meta_id_2_property_dict[meta_name]['references'])

        if 'cellularLocation' in meta_id_2_property_dict[meta_name].keys():
            secondary = memoization_dict[meta_id_2_property_dict[meta_name]
                                         ['cellularLocation']]
            DatabaseGraph.is_localized.create(primary,
                                              secondary,
                                              costum_from=primary.ID,
                                              costum_to=secondary.ID)

        if 'modification' in meta_id_2_property_dict[meta_name].keys():
            for modification in meta_id_2_property_dict[meta_name][
                    'modification']:
                if 'location' in modification.keys(
                ) and 'modification' in modification.keys():

                    located_modification = DatabaseGraph.ModificationFeature.create(
                        ID=modification['ID'],
                        type="post-translational_Mod",
                        location=modification['location'],
                        displayName=modification['modification'])

                    DatabaseGraph.is_able_to_modify.create(
                        primary,
                        located_modification,
                        costum_from=primary.ID,
                        costum_to=located_modification.ID)
Exemplo n.º 4
0
def pull_up_acc_nums_from_reactome():
    """
    Attempts to retrieve accession numbers nums from the neo4j database

    :return: dict that maps acnums to nodes in the database to which they point (Reactome proteins)
    :raise Exception: if the generator that performs a lookup of existing uniprot acnum annotation
    nodes is null (Reactome wasn't imported yet) or a wrong uniprot is crosslinked with a wrong
    reactome
    """
    acc_num_annot_nodes = DatabaseGraph.AnnotNode.index.lookup(ptype='UniProt')
    if acc_num_annot_nodes is None:
        raise Exception(
            "Reactome was not loaded or contains no acc_num cross-references to Uniprot"
        )

    acc_num_dict = {}  # acnum to AnnotNode
    for annotation_node in acc_num_annot_nodes:
        if annotation_node is not None:
            annot_obj = DatabaseGraph.vertices.get(
                get_bulbs_id(annotation_node))
            acc_num_dict[str(annot_obj.payload)] = annot_obj

    if len(acc_num_dict) < 10:
        raise Exception(
            "Reactome was not loaded or contains no acc_num cross-references to Uniprot"
        )

    reactome_proteins = {}
    for acc_num in acc_num_dict.keys():
        reactome_proteins_generator = acc_num_dict[acc_num].bothV()
        reactome_proteins[acc_num] = []
        if reactome_proteins_generator is not None:
            for vertex in reactome_proteins_generator:
                if vertex is not None:
                    reactome_proteins[acc_num].append(vertex)
        if reactome_proteins[acc_num] != 1:
            log.debug(
                'Cross-linking reactome v.s. acc_num %s mapped to %s proteins',
                acc_num, len(reactome_proteins))
    log.info('Cross-linked %s proteins from reactome v.s. Uniprot',
             len(reactome_proteins))
    return reactome_proteins
Exemplo n.º 5
0
    def get_gene_ontology_structure(self):
        """
        Loads all of the relations between the GOs that are generalisation of the seedList
         GOs and that are withing the types specified in go_namespace_filter

        """
        visited_set = set()
        seeds_list = copy(list(self.SeedSet))
        log.info('Starting gene ontology structure retrieval from the set of %s seeds',
                 len(self.SeedSet))

        while seeds_list:
            node_id = seeds_list.pop()
            visited_set.add(node_id)
            local_uniprot_list = []
            local_regulation_list = []
            local_up_regulation_list = []
            local_down_regulation_list = []
            gene_ontology_node = DatabaseGraph.GOTerm.get(node_id)
            self.GO_Names[node_id] = str(gene_ontology_node.displayName)
            self.GO_Legacy_IDs[node_id] = str(gene_ontology_node.ID)
            self.rev_GO_IDs[str(gene_ontology_node.ID)] = node_id

            for relation_type in chain(self._GOUpTypes, self._GORegTypes):
                related_go_nodes = gene_ontology_node.outV(relation_type)

                if not related_go_nodes:
                    continue  # skip in case GO Node has no outgoing relations to other GO nodes
                for go_node in related_go_nodes:
                    if go_node.Namespace not in self.go_namespace_filter:
                        continue
                    node_bulbs_id = get_bulbs_id(go_node)
                    if node_bulbs_id not in visited_set:
                        seeds_list.append(node_bulbs_id)
                    if relation_type in self._GOUpTypes:
                        local_uniprot_list.append(node_bulbs_id)
                    else:
                        local_regulation_list.append(node_bulbs_id)

                rev_generator = gene_ontology_node.inV(relation_type)

                if not rev_generator:
                    continue
                for go_node in rev_generator:
                    if go_node.Namespace not in self.go_namespace_filter:
                        continue
                    node_bulbs_id = get_bulbs_id(go_node)
                    if relation_type in self._GOUpTypes:
                        local_down_regulation_list.append(node_bulbs_id)
                    else:
                        local_up_regulation_list.append(node_bulbs_id)

            self.Reachable_nodes_dict[node_id] = (
                list(set(local_uniprot_list)),
                list(set(local_regulation_list)),
                list(set(local_down_regulation_list)),
                list(set(local_up_regulation_list)))

        self.All_GOs = list(visited_set)
        self.Num2GO = dict((i, val) for i, val in enumerate(self.All_GOs))
        self.GO2Num = dict((val, i) for i, val in enumerate(self.All_GOs))
Exemplo n.º 6
0
    def map_rows_to_names(self):
        """
        Maps Node Database IDs, Legacy IDs, display names and types to matrix row/column indexes;
        """

        def request_location(_location_buffer_dict, location):
            """
            Just a Buffered lookup of location, since the number of cellular location
            is relatively small (~80), it makes sense to buffer the IOs on it.
            Normally should be moved out as a buffering decorator

            :param _location_buffer_dict: Buffered location
            :param location: location Node Legacy ID we are willing to verify
            :return: displayName of the requested location
            """
            location = str(location)
            if location in _location_buffer_dict.keys():
                return _location_buffer_dict[location]
            else:
                generator = DatabaseGraph.Location.index.lookup(ID=location)
                if generator is not None:
                    for elt in generator:
                        _location_buffer_dict[location] = str(elt.displayName)
                        return str(elt.displayName)

        #######################################################################

        counter = 0
        location_buffer_dict = {}

        self.bulbs_id_2_matrix_index = {}
        self.matrix_index_2_bulbs_id = {}

        log.info('nodes in Highest Set: %s', len(self.Highest_Set))
        for bulbs_node_id in self.Highest_Set:
            self.bulbs_id_2_matrix_index[bulbs_node_id] = counter
            self.matrix_index_2_bulbs_id[counter] = bulbs_node_id
            node = DatabaseGraph.vertices.get(bulbs_node_id)
            self.bulbs_id_2_display_name[bulbs_node_id] = node.displayName
            self.bulbs_id2_node_type[bulbs_node_id] = node.element_type
            self.bulbs_id_2_legacy_id[bulbs_node_id] = node.ID
            if node.element_type == "UNIPROT":
                # TODO: there is a problem: there is no UNIPROT nodes reached during the expansion.
                self.reached_uniprots_bulbs_id_list.append(bulbs_node_id)
                self.uniprot_matrix_index_list.append(counter)
            if node.localization is not None:
                self.bulbs_id_2_localization[bulbs_node_id] = request_location(
                    location_buffer_dict, node.localization)
            counter += 1

        self.all_uniprots_bulbs_id_list += self.reached_uniprots_bulbs_id_list
        self.reached_uniprots_bulbs_id_list = list(set(self.reached_uniprots_bulbs_id_list))
        log.info("reached uniprots: %s", len(self.reached_uniprots_bulbs_id_list))

        up_generator = stable_get_all(DatabaseGraph.UNIPORT)
        if up_generator:
            for up_node in up_generator:
                bulbs_node_id = get_bulbs_id(up_node)
                if bulbs_node_id not in self.reached_uniprots_bulbs_id_list:
                    self.all_uniprots_bulbs_id_list.append(bulbs_node_id)
                    self.bulbs_id_2_display_name[bulbs_node_id] = up_node.displayName
                    self.bulbs_id2_node_type[bulbs_node_id] = up_node.element_type
                    self.bulbs_id_2_legacy_id[bulbs_node_id] = up_node.ID

        self.all_uniprots_bulbs_id_list = list(set(self.all_uniprots_bulbs_id_list))
        log.info("analyzable uniprots: %s", len(self.all_uniprots_bulbs_id_list))