def main(): import time start_time = time.time() import pandas as pd import StringIO # import network class from Network.py from clustergrammer import Network net = Network() # load data to dataframe # net.load_tsv_to_net('txt/example_tsv_network.txt') # net.load_tsv_to_net('txt/mat_1mb.txt') # choose file ################ # file_buffer = open('txt/col_categories.txt') file_buffer = open('txt/example_tsv_network.txt' ) buff = StringIO.StringIO( file_buffer.read() ) net.pandas_load_tsv_to_net(buff) # filter rows views = ['filter_row_sum','N_row_sum'] # distance metric dist_type = 'cosine' # linkage type linkage_type = 'average' net.make_filtered_views(dist_type=dist_type, views=views, calc_col_cats=True,\ linkage_type=linkage_type) net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent') elapsed_time = time.time() - start_time print('\n\n\nelapsed time: '+str(elapsed_time))
def enrichr_clust_from_response(response_list): from clustergrammer import Network import scipy import json import pandas as pd import math from copy import deepcopy print('\nenrichr_clust_from_response\n') ini_enr = transfer_to_enr_dict( response_list ) enr = [] scores = {} score_types = ['combined_score','pval','zscore'] for inst_score_type in score_types: scores[inst_score_type] = pd.Series() for inst_enr in ini_enr: if inst_enr['combined_score'] > 0: # make series of enriched terms with scores for inst_score_type in score_types: # collect the scores of the enriched terms if inst_score_type == 'combined_score': scores[inst_score_type][inst_enr['name']] = inst_enr[inst_score_type] if inst_score_type == 'pval': scores[inst_score_type][inst_enr['name']] = -math.log(inst_enr[inst_score_type]) if inst_score_type == 'zscore': scores[inst_score_type][inst_enr['name']] = -inst_enr[inst_score_type] # keep enrichement values enr.append(inst_enr) # sort and normalize the scores for inst_score_type in score_types: scores[inst_score_type] = scores[inst_score_type]/scores[inst_score_type].max() scores[inst_score_type].sort(ascending=False) # gather lists of top scores top_terms = {} top_terms['combined_score'] = scores['combined_score'].index.tolist()[:10] top_terms['pval'] = scores['pval'].index.tolist()[:10] top_terms['zscore'] = scores['zscore'].index.tolist()[:10] print('\ncombined_score') print(scores['combined_score'][:10]) print('\npval') print(scores['pval'][:10]) print('\nzscore') print(scores['zscore'][:10]) # gather the terms that should be kept - they are at the top of the score list keep_terms = top_terms['combined_score'] + \ top_terms['pval'] + top_terms['zscore'] keep_terms = list(set(keep_terms)) # keep enriched terms that are at the top 10 based on at least one score keep_enr = [] for inst_enr in enr: if inst_enr['name'] in keep_terms: keep_enr.append(inst_enr) # fill in full matrix ####################### # genes row_node_names = [] # enriched terms col_node_names = [] # gather information from the list of enriched terms for inst_enr in keep_enr: col_node_names.append(inst_enr['name']) row_node_names.extend(inst_enr['int_genes']) row_node_names = sorted(list(set(row_node_names))) net = Network() net.dat['nodes']['row'] = row_node_names net.dat['nodes']['col'] = col_node_names net.dat['mat'] = scipy.zeros([len(row_node_names),len(col_node_names)]) for inst_enr in keep_enr: inst_term = inst_enr['name'] col_index = col_node_names.index(inst_term) # use combined score for full matrix - will not be seen in viz tmp_score = scores['combined_score'][inst_term] net.dat['node_info']['col']['value'].append(tmp_score) for inst_gene in inst_enr['int_genes']: row_index = row_node_names.index(inst_gene) # save association net.dat['mat'][row_index, col_index] = 1 # cluster full matrix ############################# # do not make multiple views views = [''] print('\n\n\n') print('net nodes') print(net.dat['nodes']['row']) print('\n\n\n') if len(net.dat['nodes']['row']) > 1: net.make_filtered_views(dist_type='jaccard', views=views, dendro=False) else: net.make_filtered_views(dist_type='jaccard', views=views, dendro=False, run_clustering=False) # get dataframe from full matrix df = net.dat_to_df() for inst_score_type in score_types: inst_df = deepcopy(df) inst_net = deepcopy(Network()) inst_df['mat'] = inst_df['mat'][top_terms[inst_score_type]] print('\n\n'+inst_score_type) print(inst_df['mat'].shape) print(top_terms[inst_score_type]) # load back into net inst_net.df_to_dat(inst_df) # make views if len(net.dat['nodes']['row']) > 1: inst_net.make_filtered_views(dist_type='jaccard', views=['N_row_sum'], dendro=False) else: inst_net.make_filtered_views(dist_type='jaccard', views=['N_row_sum'], dendro=False, run_clustering = False) inst_views = inst_net.viz['views'] # add score_type to views for inst_view in inst_views: inst_view['enr_score_type'] = inst_score_type # add values to col_nodes and order according to rank for inst_col in inst_view['nodes']['col_nodes']: inst_col['rank'] = len(top_terms[inst_score_type]) - \ top_terms[inst_score_type].index(inst_col['name']) inst_name = inst_col['name'] inst_col['value'] = scores[inst_score_type][inst_name] # add views to main network net.viz['views'].extend(inst_views) return net
def main(mongo_address, viz_id, vect_post): from bson.objectid import ObjectId from pymongo import MongoClient from clustergrammer import Network # set up database connection client = MongoClient(mongo_address) db = client.clustergrammer viz_id = ObjectId(viz_id) # get placeholder viz data found_viz = db.networks.find_one({'_id': viz_id }) # initialize export_dat export_dat = {} export_viz = {} # try to make clustegram using vect_post try: # ini network obj net = Network() # vector endpoint net.load_vect_post_to_net(vect_post) # swap nans for zeros net.swap_nan_for_zero() # deprecated clustering modules #################################### # cluster g2e using pandas # net.fast_mult_views() # # calculate top views rather than percentage views # net.N_top_views() #################################### net.make_filtered_views(dist_type='cosine', dendro=True, \ views=['N_row_sum'], linkage_type='average') # export dat try: # convert data to list net.dat['mat'] = net.dat['mat'].tolist() net.dat['mat_up'] = net.dat['mat_up'].tolist() net.dat['mat_dn'] = net.dat['mat_dn'].tolist() export_dat['dat'] = net.export_net_json('dat') export_dat['source'] = 'g2e_enr_vect' dat_id = db.network_data.insert( export_dat ) print('G2E: network data successfully uploaded') except: export_dat['dat'] = 'data-too-large' export_dat['source'] = 'g2e_enr_vect' dat_id = db.network_data.insert( export_dat ) print('G2E: network data too large to be uploaded') update_viz = net.viz update_dat = dat_id # if there is an error update json with error except: print('\n--------------------------------') print('G2E clustering error') print('----------------------------------\n') update_viz = 'error' update_dat = 'error' # export vix to database found_viz['viz'] = update_viz found_viz['dat'] = update_dat # update the viz data try: db.networks.update_one( {"_id":viz_id}, {"$set": found_viz} ) print('\n\n---------------------------------------------------') print( 'G2E Successfully made and uploaded clustergram') print('---------------------------------------------------\n\n') except: print('\n--------------------------------') print('G2E error in loading viz into database') print('----------------------------------\n') # close database connection client.close()
def main( buff, inst_filename, mongo_address, viz_id): import numpy as np import flask from bson.objectid import ObjectId from pymongo import MongoClient from flask import request from clustergrammer import Network import StringIO ############################## # set up database connection ############################## # set up connection client = MongoClient(mongo_address) db = client.clustergrammer # get placeholder viz data viz_id = ObjectId(viz_id) found_viz = db.networks.find_one({'_id':viz_id}) try: ######################## # load and cluster ######################## # initiate class network net = Network() # net.load_lines_from_tsv_to_net(file_lines) net.pandas_load_tsv_to_net(buff) # swap nans for zero net.swap_nan_for_zero() # deprecated clustering module #################################### # # fast mult views takes care of pre-filtering # net.fast_mult_views() #################################### net.make_filtered_views(dist_type='cosine', dendro=True, \ views=['filter_row_sum'], linkage_type='average') ############################### # save to database ############################### export_dat = {} export_dat['name'] = inst_filename export_dat['dat'] = net.export_net_json('dat') export_dat['source'] = 'user_upload' # save dat to separate document dat_id = db.network_data.insert(export_dat) update_viz = net.viz update_dat = dat_id except: print('\n-----------------------') print('error in clustering') print('-----------------------\n') update_viz = 'error' update_dat = 'error' # update found_viz found_viz['viz'] = update_viz found_viz['dat'] = update_dat # update found_viz in database db.networks.update_one( {'_id':viz_id}, {'$set': found_viz} ) ############################ # end database connection ############################ client.close()
import time start_time = time.time() # import network class from Network.py from clustergrammer import Network net = Network() net.load_tsv_to_net('txt/example_tsv.txt') net.make_filtered_views(dist_type='cos',views=['N_row_sum','pct_row_sum']) net.write_json_to_file('viz', 'json/mult_view.json', 'indent') # your code elapsed_time = time.time() - start_time print('\n\n\nelapsed time') print(elapsed_time)