示例#1
0
文件: ssm.py 项目: sdyz5210/IHP
def simgic_hindex(id1, id2, h=4):
    cur = db.cursor()
    cur.execute(
        """SELECT ( 
            SELECT SUM(y.rel_info)
            FROM  ( 
                SELECT DISTINCT f.term_id, f.rel_info 
                FROM graph_path p1, graph_path p2, SSM_TermDesc f 
                WHERE p1.term2_id = %s
                AND p2.term2_id = %s 
                AND p1.term1_id=p2.term1_id 
                AND p1.term1_id=f.term_id 
                AND f.hindex >= %s)
            AS y )
         /( 
            SELECT SUM(x.rel_info)   
            FROM (  
                SELECT f1.term_id, f1.rel_info   
                FROM graph_path p1, SSM_TermDesc f1 
                WHERE p1.term2_id  = %s
                AND p1.term1_id = f1.term_id
                AND f1.hindex >= %s
                UNION  
                SELECT f2.term_id, f2.rel_info   
                FROM graph_path p2, SSM_TermDesc f2
                WHERE p2.term2_id = %s
                AND p2.term1_id = f2.term_id
                AND f2.hindex >= %s) 
            AS x )""", (id1, id2, h, id1, h, id2, h))
    res = cur.fetchone()[0]
    if res is None:
        res = '0'
    return float(res)
示例#2
0
def simui(id1, id2):
    cur = db.cursor()
    cur.execute("""SELECT ( 
			SELECT COUNT(y.rel_info)
			FROM  ( 
				SELECT DISTINCT f.term_id, f.rel_info 
				FROM graph_path p1, graph_path p2, SSM_TermDesc f 
				WHERE p1.term2_id = %s
				AND p2.term2_id = %s 
				AND p1.term1_id=p2.term1_id 
				AND p1.term1_id=f.term_id)
			AS y )
		 /( 
		 	SELECT COUNT(x.rel_info)   
		 	FROM (  
                SELECT f1.term_id, f1.rel_info   
                FROM graph_path p1, SSM_TermDesc f1 
                WHERE p1.term2_id  = %s
                AND p1.term1_id = f1.term_id
                UNION  
                SELECT f2.term_id, f2.rel_info   
                FROM graph_path p2, SSM_TermDesc f2
                WHERE p2.term2_id = %s 
                AND p2.term1_id = f2.term_id) 
		 	AS x )""", (id1,id2,id1,id2))
    res = cur.fetchone()[0]
    if res is None:
        res = '0'
    return float(res)
示例#3
0
文件: ssm.py 项目: sdyz5210/IHP
def simui(id1, id2):
    cur = db.cursor()
    cur.execute(
        """SELECT ( 
			SELECT COUNT(y.rel_info)
			FROM  ( 
				SELECT DISTINCT f.term_id, f.rel_info 
				FROM graph_path p1, graph_path p2, SSM_TermDesc f 
				WHERE p1.term2_id = %s
				AND p2.term2_id = %s 
				AND p1.term1_id=p2.term1_id 
				AND p1.term1_id=f.term_id)
			AS y )
		 /( 
		 	SELECT COUNT(x.rel_info)   
		 	FROM (  
                SELECT f1.term_id, f1.rel_info   
                FROM graph_path p1, SSM_TermDesc f1 
                WHERE p1.term2_id  = %s
                AND p1.term1_id = f1.term_id
                UNION  
                SELECT f2.term_id, f2.rel_info   
                FROM graph_path p2, SSM_TermDesc f2
                WHERE p2.term2_id = %s 
                AND p2.term1_id = f2.term_id) 
		 	AS x )""", (id1, id2, id1, id2))
    res = cur.fetchone()[0]
    if res is None:
        res = '0'
    return float(res)
示例#4
0
文件: ssm.py 项目: lasigeBioTM/IBEnt
def harmonicmeanIC(id1, id2):
    if id1 == 0 or id2 == 0:
        return 0
    cur = db.cursor()
    cur.execute("""SELECT f.rel_info 
				FROM SSM_TermDesc f 
				WHERE f.term_id = %s""", (id1,))
    ic1 = float(cur.fetchone()[0])
    cur = db.cursor()
    cur.execute("""SELECT f.rel_info 
				FROM SSM_TermDesc f 
				WHERE f.term_id = %s""", (id2,))
    ic2 = float(cur.fetchone()[0])
    if ic2 == 0 or ic2 == 0:
        return 0
    return (2*ic1*ic2)/(ic1+ic2)
示例#5
0
def simgic_hindex(id1, id2, h=4):
    cur = db.cursor()
    cur.execute("""SELECT ( 
            SELECT SUM(y.rel_info)
            FROM  ( 
                SELECT DISTINCT f.term_id, f.rel_info 
                FROM graph_path p1, graph_path p2, SSM_TermDesc f 
                WHERE p1.term2_id = %s
                AND p2.term2_id = %s 
                AND p1.term1_id=p2.term1_id 
                AND p1.term1_id=f.term_id 
                AND f.hindex >= %s)
            AS y )
         /( 
            SELECT SUM(x.rel_info)   
            FROM (  
                SELECT f1.term_id, f1.rel_info   
                FROM graph_path p1, SSM_TermDesc f1 
                WHERE p1.term2_id  = %s
                AND p1.term1_id = f1.term_id
                AND f1.hindex >= %s
                UNION  
                SELECT f2.term_id, f2.rel_info   
                FROM graph_path p2, SSM_TermDesc f2
                WHERE p2.term2_id = %s
                AND p2.term1_id = f2.term_id
                AND f2.hindex >= %s) 
            AS x )""", (id1,id2,h,id1,h,id2,h))
    res = cur.fetchone()[0]
    if res is None:
        res = '0'
    return float(res)
示例#6
0
def get_IC():
    cur = db.cursor()
    # check for exact match
    query = """SELECT distinct term_id, rel_info, hindex_info, seco_info
               FROM SSM_TermDesc"""
    # print "QUERY", query
    cur.execute(query)

    res = cur.fetchall()
    return zip(*res)
示例#7
0
def get_IC():
    cur = db.cursor()
    # check for exact match
    query = """SELECT distinct term_id, rel_info, hindex_info, seco_info
               FROM SSM_TermDesc"""
    # print "QUERY", query
    cur.execute(query)

    res = cur.fetchall()
    return zip(*res)
示例#8
0
def check_dist_between(cid1, cid2):
    cur = db.cursor()
    query = """SELECT distance 
               FROM graph_path 
               WHERE term1_id = %s and term2_id = %s""" % (cid1, cid2)
    cur.execute(query)
    res = cur.fetchone()
    if res is None:
        dist = -1
    else:
        dist = int(res[0])
    return dist
示例#9
0
def check_dist_between(cid1, cid2):
    cur = db.cursor()
    query = """SELECT distance 
               FROM graph_path 
               WHERE term1_id = %s and term2_id = %s""" % (cid1, cid2)
    cur.execute(query)
    res = cur.fetchone()
    if res is None:
        dist = -1
    else:
        dist = int(res[0])
    return dist
示例#10
0
def get_description(id):
    cur = db.cursor()
    query = """SELECT term_definition
           FROM term_definition
           WHERE term_id = %s""" % id
    cur.execute(query)
    res = cur.fetchone()

    if res is not None:
        return res[0]
    else:
        return "NA"
示例#11
0
def get_description(id):
    cur = db.cursor()
    query = """SELECT term_definition
           FROM term_definition
           WHERE term_id = %s""" % id
    cur.execute(query)
    res = cur.fetchone()

    if res is not None:
        return res[0]
    else:
        return "NA"
示例#12
0
文件: ssm.py 项目: lasigeBioTM/IBEnt
def termsInCommon(id1, id2):
    cur = db.cursor()
    cur.execute("""SELECT COUNT(i.rel_info) 
		FROM graph_path p1, graph_path p2, SSM_TermDesc i 
		WHERE p1.term2_id = %s
		AND p2.term2_id = %s 
		AND p1.term1_id = p2.term1_id 
		AND p1.term1_id = i.term_id;""", (id1, id2))
    res = cur.fetchone()[0]
    if res is None:
        res = '0'
    return int(res)
示例#13
0
文件: ssm.py 项目: lasigeBioTM/IBEnt
def resnik(id1, id2):
    cur = db.cursor()
    cur.execute("""SELECT MAX(i.rel_info) 
		FROM graph_path p1, graph_path p2, SSM_TermDesc i 
		WHERE p1.term2_id = %s
		AND p2.term2_id = %s 
		AND p1.term1_id = p2.term1_id 
		AND p1.term1_id = i.term_id;""", (id1, id2))
    #r = cur.store_result()
    res = cur.fetchone()[0]
    #print id1, id2, res
    if res is None:
        res = '0'
    return float(res)
示例#14
0
def load_synonyms():
    syns = []
    cur = db.cursor()
    query = """SELECT id, name
           FROM term """
    cur.execute(query)
    ids = cur.fetchall()
    for i in ids:
        print "getting synonyms for" + i[1].lower() + '(' + str(i[0]) + ')',
        synset = set()
        synset.add(i[1].lower())
        query = """SELECT term_synonym
           FROM term_synonym
           WHERE term_id = %s""" % i[0]
        cur.execute(query)
        names = cur.fetchall()
        print len(names)
        for name in names:
            #print name[0], 
            synset.add(name[0].lower())
        syns.append(synset)
    pickle.dump(syns, open("data/chebi_synonyms.pickle", 'wb'))
    print "done"
示例#15
0
def load_synonyms():
    syns = []
    cur = db.cursor()
    query = """SELECT id, name
           FROM term """
    cur.execute(query)
    ids = cur.fetchall()
    for i in ids:
        print "getting synonyms for" + i[1].lower() + '(' + str(i[0]) + ')',
        synset = set()
        synset.add(i[1].lower())
        query = """SELECT term_synonym
           FROM term_synonym
           WHERE term_id = %s""" % i[0]
        cur.execute(query)
        names = cur.fetchall()
        print len(names)
        for name in names:
            #print name[0],
            synset.add(name[0].lower())
        syns.append(synset)
    pickle.dump(syns, open("data/chebi_synonyms.pickle", 'wb'))
    print "done"
示例#16
0
def find_chebi_term(term, adjust=0):
    ''' returns tuple (chebiID, chebiTerm, score)
        if resolution fails, return ('0', 'null', 0.0)
    '''
    # print "TERM", term
    term = MySQLdb.escape_string(term)
    # adjust - adjust the final score
    match = ()
    cur = db.cursor()
    # check for exact match
    query = """SELECT distinct id, name
                   FROM term a 
                   WHERE name =%s and LENGTH(a.name)>0 and star=3;"""
    # print "QUERY", query
    cur.execute(query, (term,))

    res = cur.fetchone()
    if res is not None:
        # print "1"
        score = 1.0 + adjust
        match = (str(res[0]), res[1], score)
    else:
        # synonyms
        cur.execute("""SELECT a.term_id, a.term_synonym, b.name
                       FROM term_synonym a, term b
                       WHERE a.term_synonym=%s
                        and b.id=a.term_id
                        and LENGTH(a.term_synonym)>0
                        and star=3;""", (term,))
        res = cur.fetchone()
        if res is not None:
            # print "2"
            score = 0.8 + adjust
            match = (str(res[0]), res[2], score)

        else:
            # plural - tb pode ser recursivo
            if len(term) > 0 and term[-1] == 's':
                match = find_chebi_term(term[:-1], -0.1)
#
    if not match:
        #(1)H -> hydrogen-1
        terms = re.sub(r'[\(|\)|\[|\]| ]', ' ',  term)
        termlist = terms.strip().split(" ")
        if len(termlist) == 2:
            if termlist[0].isdigit() and termlist[1] in element_base:
                match = find_chebi_term(element_base[termlist[1]][0] + "-" + termlist[0], -0.1)
            if termlist[1] == '+' and termlist[0] in element_base:
                match = find_chebi_term(element_base[termlist[0]][0] + ' cation', -0.1)
            if termlist[1] == '-' and termlist[0] in element_base:
                match = find_chebi_term(element_base[termlist[0]][0] + ' anion', -0.1)
            # if match != '':
            #    print term, match

    if not match:
        # partial match

        terms = '("' + '","'.join(term.split(" ")) + '")'
        query = """ 
        SELECT ((sum(d.ic)/ec)-0.1) as score, e.name, c.term_id, c.id,
                group_concat(d.word separator ','), count(d.id), c.descriptor_type
        FROM term e JOIN descriptor3 c ON(c.term_id=e.id) JOIN word2term3 b ON (b.descriptor_id=c.id) 
             JOIN word3 d ON (d.id=b.word_id) JOIN SSM_TermDesc f ON (e.id=f.term_id)
        WHERE b.word_id IN
           (SELECT distinct id
            FROM word3
            WHERE word in %s)
        GROUP by c.id 
        ORDER by score desc 
        LIMIT 3;""" % (terms,)
        # print "QUERY3", query, adjust
        cur.execute(query)
        res = cur.fetchone()

        if res is not None:
            # print "3"
            match = (str(res[3]), res[1], float(res[0]))
            # print term, match

    if not match or match[2] < 0.0:
        match = ('0', 'null', 0.0)

    return match
示例#17
0
def find_chebi_term(term, adjust=0):
    ''' returns tuple (chebiID, chebiTerm, score)
        if resolution fails, return ('0', 'null', 0.0)
    '''
    # print "TERM", term
    term = MySQLdb.escape_string(term)
    # adjust - adjust the final score
    match = ()
    cur = db.cursor()
    # check for exact match
    query = """SELECT distinct id, name
                   FROM term a 
                   WHERE name =%s and LENGTH(a.name)>0 and star=3;"""
    # print "QUERY", query
    cur.execute(query, (term, ))

    res = cur.fetchone()
    if res is not None:
        # print "1"
        score = 1.0 + adjust
        match = (str(res[0]), res[1], score)
    else:
        # synonyms
        cur.execute(
            """SELECT a.term_id, a.term_synonym, b.name
                       FROM term_synonym a, term b
                       WHERE a.term_synonym=%s
                        and b.id=a.term_id
                        and LENGTH(a.term_synonym)>0
                        and star=3;""", (term, ))
        res = cur.fetchone()
        if res is not None:
            # print "2"
            score = 0.8 + adjust
            match = (str(res[0]), res[2], score)

        else:
            # plural - tb pode ser recursivo
            if len(term) > 0 and term[-1] == 's':
                match = find_chebi_term(term[:-1], -0.1)


#
    if not match:
        #(1)H -> hydrogen-1
        terms = re.sub(r'[\(|\)|\[|\]| ]', ' ', term)
        termlist = terms.strip().split(" ")
        if len(termlist) == 2:
            if termlist[0].isdigit() and termlist[1] in element_base:
                match = find_chebi_term(
                    element_base[termlist[1]][0] + "-" + termlist[0], -0.1)
            if termlist[1] == '+' and termlist[0] in element_base:
                match = find_chebi_term(
                    element_base[termlist[0]][0] + ' cation', -0.1)
            if termlist[1] == '-' and termlist[0] in element_base:
                match = find_chebi_term(
                    element_base[termlist[0]][0] + ' anion', -0.1)
            # if match != '':
            #    print term, match

    if not match:
        # partial match

        terms = '("' + '","'.join(term.split(" ")) + '")'
        query = """ 
        SELECT ((sum(d.ic)/ec)-0.1) as score, e.name, c.term_id, c.id,
                group_concat(d.word separator ','), count(d.id), c.descriptor_type
        FROM term e JOIN descriptor3 c ON(c.term_id=e.id) JOIN word2term3 b ON (b.descriptor_id=c.id) 
             JOIN word3 d ON (d.id=b.word_id) JOIN SSM_TermDesc f ON (e.id=f.term_id)
        WHERE b.word_id IN
           (SELECT distinct id
            FROM word3
            WHERE word in %s)
        GROUP by c.id 
        ORDER by score desc 
        LIMIT 3;""" % (terms, )
        # print "QUERY3", query, adjust
        cur.execute(query)
        res = cur.fetchone()

        if res is not None:
            # print "3"
            match = (str(res[3]), res[1], float(res[0]))
            # print term, match

    if not match or match[2] < 0.0:
        match = ('0', 'null', 0.0)

    return match