Пример #1
0
    def scores(self, id1, id2):
        """Returns the score between two given GO terms.
        :arg id1, identifier of a GO term (ie: GO:0043231, or whatever
            identifier is in your ontology).
        :arg id2, identifier of a GO term (ie: GO:0043229, or whatever
            identifier is in your ontology).
        """
        golib = PyGoLib(self.goterms)
        #golib.fix_go_graph()
        goterm1 = self.goterms[id1]
        path1 = golib.get_path(goterm1, pred=goterm1['id'], paths=[],
                               details=True)
        #print goterm1['id'], len(path1)
        ancester1 = _get_all_ancesters(path1)
        semantic_values1 = self.semantic_values(goterm1['id'])

        goterm2 = self.goterms[id2]
        path2 = golib.get_path(goterm2, pred=goterm2['id'], paths=[],
                               details=True)
        #print goterm2['id'], len(path2)
        ancester2 = _get_all_ancesters(path2)
        semantic_values2 = self.semantic_values(goterm2['id'])

        common_ancester = list(set(ancester1).intersection(set(ancester2)))
        sum_comm_anc = 0
        for ancester in common_ancester:
            sum_comm_anc = sum_comm_anc + semantic_values2[ancester] + \
                semantic_values1[ancester]

        score = sum_comm_anc / (sum(semantic_values1.values())
                                + sum(semantic_values2.values()))
        return score
Пример #2
0
    def semantic_values(self, id1):
        """ Returns the semantic values of all the parents of a given
        term.
        :arg id1, identifier of a GO term (ie: GO:0043231, or whatever
            identifier is in your ontology).
        """
        golib = PyGoLib(self.goterms)
        goterm1 = self.goterms[id1]
        path1 = golib.get_path(goterm1, pred=goterm1['id'], paths=[],
                               details=True)

        semantic_values = {}
        for ancester in _get_all_ancesters(path1):
            if ancester == goterm1['id']:
                semantic_values[ancester] = 1
                continue
            tmp_score = []
            for item in path1:
                tmp_cnt = 1
                if ancester in item:
                    path_el = item.split(',')
                    ind = path_el.index(ancester)
                    for step in range(ind - 1, 0, -2):
                        if path_el[step] == 'is_a':
                            tmp_cnt = tmp_cnt * 0.8
                        elif path_el[step] == 'part_of':
                            tmp_cnt = tmp_cnt * 0.6
                    tmp_score.append(tmp_cnt)
            semantic_values[ancester] = max(tmp_score)
        return semantic_values
Пример #3
0
    def semantic_values(self, id1):
        """ Returns the semantic values of all the parents of a given
        term.
        :arg id1, identifier of a GO term (ie: GO:0043231, or whatever
            identifier is in your ontology).
        """
        golib = PyGoLib(self.goterms)
        goterm1 = self.goterms[id1]
        path1 = golib.get_path(goterm1,
                               pred=goterm1['id'],
                               paths=[],
                               details=True)

        semantic_values = {}
        for ancester in _get_all_ancesters(path1):
            if ancester == goterm1['id']:
                semantic_values[ancester] = 1
                continue
            tmp_score = []
            for item in path1:
                tmp_cnt = 1
                if ancester in item:
                    path_el = item.split(',')
                    ind = path_el.index(ancester)
                    for step in range(ind - 1, 0, -2):
                        if path_el[step] == 'is_a':
                            tmp_cnt = tmp_cnt * 0.8
                        elif path_el[step] == 'part_of':
                            tmp_cnt = tmp_cnt * 0.6
                    tmp_score.append(tmp_cnt)
            semantic_values[ancester] = max(tmp_score)
        return semantic_values
Пример #4
0
 def scores(self, id1, id2):
     """Returns the score between two given GO terms.
     :arg id1, identifier of a GO term (ie: GO:0043231, or whatever
         identifier is in your ontology).
     :arg id2, identifier of a GO term (ie: GO:0043229, or whatever
         identifier is in your ontology).
     """
     golib = PyGoLib(self.goterms)
     goterm1 = self.goterms[id1]
     path1 = golib.get_path(goterm1, pred=goterm1['id'], paths=[])
     goterm2 = self.goterms[id2]
     path2 = golib.get_path(goterm2, pred=goterm2['id'], paths=[])
     # We use goterm['id'] instead of the id provided to take into
     # account alt_id which are in the list of goterms but not in the
     # paths. Via goterm['id'] we get the 'normal' GO term identifier.
     scores = self.__score_parents(goterm1['id'], goterm2['id'], path1,
                                   path2)
     if scores:
         score = min(scores)
         self.log.debug("%s and %s are parents" % (id1, id2))
         return (score, score)
     else:
         score = self.__score_cousins(goterm1['id'], goterm2['id'], path1,
                                      path2)
         return score
Пример #5
0
 def __init__(self, data=None):
     """ Constructor.
     :arg data, the graph of ontologies
     """
     self.goterms = data
     if self.goterms is None:
         self.goterms = {}
     self.log = get_logger()
     self.pygo = PyGoLib(self.goterms)
Пример #6
0
    def scores(self, id1, id2):
        """Returns the score between two given GO terms.
        :arg id1, identifier of a GO term (ie: GO:0043231, or whatever
            identifier is in your ontology).
        :arg id2, identifier of a GO term (ie: GO:0043229, or whatever
            identifier is in your ontology).
        """
        golib = PyGoLib(self.goterms)
        #golib.fix_go_graph()
        goterm1 = self.goterms[id1]
        path1 = golib.get_path(goterm1,
                               pred=goterm1['id'],
                               paths=[],
                               details=True)
        #print goterm1['id'], len(path1)
        ancester1 = _get_all_ancesters(path1)
        semantic_values1 = self.semantic_values(goterm1['id'])

        goterm2 = self.goterms[id2]
        path2 = golib.get_path(goterm2,
                               pred=goterm2['id'],
                               paths=[],
                               details=True)
        #print goterm2['id'], len(path2)
        ancester2 = _get_all_ancesters(path2)
        semantic_values2 = self.semantic_values(goterm2['id'])

        common_ancester = list(set(ancester1).intersection(set(ancester2)))
        sum_comm_anc = 0
        for ancester in common_ancester:
            sum_comm_anc = sum_comm_anc + semantic_values2[ancester] + \
                semantic_values1[ancester]

        score = sum_comm_anc / (sum(semantic_values1.values()) +
                                sum(semantic_values2.values()))
        return score
Пример #7
0
class GoDistanceCounter(object):
    """ This class is the main class of the project to compute the
    distance between two GO terms.
    """
    def __init__(self, data=None):
        """ Constructor.
        :arg data, the graph of ontologies
        """
        self.goterms = data
        if self.goterms is None:
            self.goterms = {}
        self.log = get_logger()
        self.pygo = PyGoLib(self.goterms)

    def __score_cousins(self, goid1, goid2, path1=None, path2=None):
        """ For two given GO term ID and the list of their path, return
        the score between them.
        :arg goid1, GO term ID (ie: GO:0043231, or whatever identifier is
            in your ontology).
        :arg goid2, GO term ID (ie: GO:0043229, or whatever identifier is
            in your ontology).
        :kwarg path1, the list path from the first GO term to the top of
        the tree, as returned by get_path().
        :kwarg path2, the list path from the second GO term to the top
        of the tree, as returned by get_path().
        """
        if path1 is None:
            path1 = self.pygo.get_path(self.goterms[goid1])
        if path2 is None:
            path2 = self.pygo.get_path(self.goterms[goid2])

        mindist = None
        deltalevel = None
        for path in path1:
            step1 = path.split(',')
            for opath in path2:
                step2 = opath.split(',')
                inter = _get_ancester(step1, step2)
                if inter:
                    index1 = step1.index(inter)
                    index2 = step2.index(inter)
                    dist = index1 + index2
                    deltaleveltmp = abs(index1 - index2)
                    if not mindist or dist < mindist:
                        mindist = dist
                        deltalevel = deltaleveltmp
        if mindist is not None and deltalevel is not None:
            return (mindist, deltalevel)
        else:
            return None

    def __score_parents(self, goid1, goid2, path1=None, path2=None):
        """ For two given GO term ID and the list of their path, return
        the score between them if one is parent of the other.
        :arg goid1, GO term ID (ie: GO:0043231, or whatever identifier is
            in your ontology).
        :arg goid2, GO term ID (ie: GO:0043229, or whatever identifier is
            in your ontology).
        :kwarg path1, the list path from the first GO term to the top of
        the tree, as returned by get_path().
        :kwarg path2, the list path from the second GO term to the top
        of the tree, as returned by get_path().
        """
        if path1 is None:
            path1 = self.pygo.get_path(self.goterms[goid1])
        scores = []
        for paths in path1:
            steps = paths.split(',')
            if goid2 in steps:
                start = steps.index(goid1)
                stop = steps.index(goid2)
                score = abs(stop - start)
                scores.append(score)
        if path2 is None:
            path2 = self.pygo.get_path(self.goterms[goid2])
        for paths in path2:
            steps = paths.split(',')
            if goid1 in steps:
                start = steps.index(goid1)
                stop = steps.index(goid2)
                score = abs(stop - start)
                scores.append(score)
        return scores

    def scores(self, id1, id2):
        """Returns the score between two given GO terms.
        :arg id1, identifier of a GO term (ie: GO:0043231, or whatever
            identifier is in your ontology).
        :arg id2, identifier of a GO term (ie: GO:0043229, or whatever
            identifier is in your ontology).
        """
        golib = PyGoLib(self.goterms)
        goterm1 = self.goterms[id1]
        path1 = golib.get_path(goterm1, pred=goterm1['id'], paths=[])
        goterm2 = self.goterms[id2]
        path2 = golib.get_path(goterm2, pred=goterm2['id'], paths=[])
        # We use goterm['id'] instead of the id provided to take into
        # account alt_id which are in the list of goterms but not in the
        # paths. Via goterm['id'] we get the 'normal' GO term identifier.
        scores = self.__score_parents(goterm1['id'], goterm2['id'], path1,
                                      path2)
        if scores:
            score = min(scores)
            self.log.debug("%s and %s are parents" % (id1, id2))
            return (score, score)
        else:
            score = self.__score_cousins(goterm1['id'], goterm2['id'], path1,
                                         path2)
            return score