示例#1
0
# to convert network in gml to json format for web visulization
import json
import networkx as nx
from networkx.readwrite import json_graph

import sys
sys.path.append('/Users/zichen/Documents/bitbucket/maayanlab_utils')

from fileIO import mysqlTable2dict
d_sename_umls = mysqlTable2dict('sep', 'side_effects', 2, 1)


def gml2json(gml_fn, json_fn):
    G = nx.read_gml(gml_fn)
    print G.number_of_nodes(), G.number_of_edges()
    for node_id in G.nodes():
        node_dict = G.node[node_id]
        if '|' in node_dict['label']:
            sl = node_dict['label'].split('|')
            label = '%s (%s)' % (sl[1], sl[0])
            G.node[node_id]['label'] = label

        # clean labels and add xref
        if node_dict['type'] == "SE":
            G.node[node_id]['type'] = 'triangle-up'
            G.node[node_id]['xref'] = d_sename_umls[node_dict['label']]  # umls
        else:
            sl = G.node[node_id]['label'].split('(')
            xref = sl[-1]
            label = '('.join(sl[0:-1])
            label = label.strip()
示例#2
0
PREDICTION_DF = HOME + '/Documents/Zichen_Projects/drug_se_prediction/PTs_RF1000_proba_df_n20338x1053.txt'

## for side effects
# GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/RF1000_GOtCS_AUC_0.7_proba_0.6_prediction_only.gmt'
GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/ET100_GOtCS_AUC_0.76_proba_0.75.gmt' 
GML_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/side_effect_network.gml'
## for drugs
# GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/RF1000_GOtCS_AUC_0.7_proba_0.6_prediction_only_flipped.gmt'
# GML_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/drug_network.gml'

CSV_FN = GML_FN.replace('.gml', '.csv')
JSON_FN = CSV_FN.replace('.csv', '.json')

## retrieve meta data about SE
d_umls_pt = mysqlTable2dict('sep', 'side_effects', 1, 2)
d_pt_umls = mysqlTable2dict('sep', 'side_effects', 2, 1)
d_soc_pt = read_gmt(HOME+'/Documents/bitbucket/pertid2trainingset/Y_matrix_no_mfc/SOC_to_pt.gmt')
print len(d_soc_pt)
d_umls_soc = {}
for soc, pts in d_soc_pt.items():
	for pt in pts:
		umls = d_pt_umls[pt]
		if umls is not None:
			if umls not in d_umls_soc:
				d_umls_soc[umls] = soc
			# else:
			# 	d_umls_soc[umls].append(soc) # one PT may have multiple SOCs

# for pt in d_umls_soc:
# 	if len(d_umls_soc[pt]) != 1:
示例#3
0
    if kwargs['umls_id'] in d_umls_soc:
        kwargs['soc'] = d_umls_soc[kwargs['umls_id']]
    else:
        kwargs['soc'] = None
    instance = get_or_create(session, SideEffect, **kwargs)

for pvals, pert_id in zip(mat, pert_ids):
    mask = pvals > 0.5
    se_names_pos = se_names[mask].tolist()
    aucs_pos = aucs[mask].tolist()
    pvals_pos = pvals[mask].tolist()
    add_predictions(se_names_pos, aucs_pos, pert_id, pvals_pos, session)

## transfer association tables
# sider_connections
d_pert_ids = mysqlTable2dict('maaya0_SEP', 'drugs_lincs', 0, 1)
d_umls_id = mysqlTable2dict('maaya0_SEP', 'side_effects', 0, 1)

conn = MySQLdb.connect(host='localhost',
                       user='******',
                       passwd='',
                       db='maaya0_SEP')
cur = conn.cursor()
query = """SELECT * FROM `%s`""" % 'sider_connections'
cur.execute(query)
d_pert_umls_ids = {}
for row in cur:
    pert_id, umls_id = row
    if pert_id in d_pert_ids and umls_id in d_umls_id:
        if pert_id not in d_pert_umls_ids:
            d_pert_umls_ids[pert_id] = [umls_id]
示例#4
0
		kwargs['soc'] = None
	instance = get_or_create(session, SideEffect, **kwargs)


for pvals, pert_id in zip(mat, pert_ids):
	mask = pvals > 0.5
	se_names_pos = se_names[mask].tolist()
	aucs_pos = aucs[mask].tolist()
	pvals_pos = pvals[mask].tolist()
	add_predictions(se_names_pos, aucs_pos, pert_id, pvals_pos, session)



## transfer association tables
# sider_connections
d_pert_ids = mysqlTable2dict('maaya0_SEP', 'drugs_lincs', 0, 1)
d_umls_id = mysqlTable2dict('maaya0_SEP', 'side_effects', 0, 1)

conn = MySQLdb.connect(host='localhost',user='******', passwd='',db='maaya0_SEP')
cur = conn.cursor()
query = """SELECT * FROM `%s`""" %'sider_connections'
cur.execute(query)
d_pert_umls_ids = {}
for row in cur:
	pert_id, umls_id = row
	if pert_id in d_pert_ids and umls_id in d_umls_id:
		if pert_id not in d_pert_umls_ids:
			d_pert_umls_ids[pert_id] = [umls_id]
		else:
			if umls_id not in d_pert_umls_ids[pert_id]:
				d_pert_umls_ids[pert_id].append(umls_id)
示例#5
0
# to convert network in gml to json format for web visulization
import json
import networkx as nx
from networkx.readwrite import json_graph

import sys
sys.path.append('/Users/zichen/Documents/bitbucket/maayanlab_utils')

from fileIO import mysqlTable2dict
d_sename_umls = mysqlTable2dict('sep', 'side_effects', 2,1)

def gml2json(gml_fn, json_fn):
	G = nx.read_gml(gml_fn)
	print G.number_of_nodes(), G.number_of_edges()
	for node_id in G.nodes():
		node_dict = G.node[node_id]
		if '|' in node_dict['label']:
			sl = node_dict['label'].split('|')
			label = '%s (%s)'%( sl[1], sl[0] )
			G.node[node_id]['label'] = label
			
		# clean labels and add xref
		if node_dict['type'] == "SE":
			G.node[node_id]['type'] = 'triangle-up'
			G.node[node_id]['xref'] = d_sename_umls[node_dict['label']] # umls
		else:
			sl = G.node[node_id]['label'].split('(')
			xref = sl[-1]
			label = '('.join(sl[0:-1])
			label = label.strip()
			xref  = xref.strip(')')
示例#6
0
PREDICTION_DF = HOME + '/Documents/Zichen_Projects/drug_se_prediction/PTs_RF1000_proba_df_n20338x1053.txt'

## for side effects
# GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/RF1000_GOtCS_AUC_0.7_proba_0.6_prediction_only.gmt'
GMT_FN = HOME + '/Documents/Zichen_Projects/drug_se_prediction/ET100_GOtCS_AUC_0.76_proba_0.75.gmt'
GML_FN = HOME + '/Documents/Zichen_Projects/drug_se_prediction/side_effect_network.gml'
## for drugs
# GMT_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/RF1000_GOtCS_AUC_0.7_proba_0.6_prediction_only_flipped.gmt'
# GML_FN = HOME+'/Documents/Zichen_Projects/drug_se_prediction/drug_network.gml'

CSV_FN = GML_FN.replace('.gml', '.csv')
JSON_FN = CSV_FN.replace('.csv', '.json')

## retrieve meta data about SE
d_umls_pt = mysqlTable2dict('sep', 'side_effects', 1, 2)
d_pt_umls = mysqlTable2dict('sep', 'side_effects', 2, 1)
d_soc_pt = read_gmt(
    HOME +
    '/Documents/bitbucket/pertid2trainingset/Y_matrix_no_mfc/SOC_to_pt.gmt')
print len(d_soc_pt)
d_umls_soc = {}
for soc, pts in d_soc_pt.items():
    for pt in pts:
        umls = d_pt_umls[pt]
        if umls is not None:
            if umls not in d_umls_soc:
                d_umls_soc[umls] = soc
            # else:
            # 	d_umls_soc[umls].append(soc) # one PT may have multiple SOCs