Exemplo n.º 1
0
def make_viz_from_df(df, filename):
    from clustergrammer import Network

    net = Network()

    net.df_to_dat(df)
    net.swap_nan_for_zero()

    # zscore first to get the columns distributions to be similar
    net.normalize(axis='col', norm_type='zscore', keep_orig=True)

    # filter the rows to keep the perts with the largest normalizes values
    net.filter_N_top('row', 2000)

    num_coluns = net.dat['mat'].shape[1]

    if num_coluns < 50:
        # views = ['N_row_sum', 'N_row_var']
        views = ['N_row_sum']
        net.make_clust(dist_type='cos', views=views)

        filename = 'json/' + filename.split('/')[1].replace('.gct',
                                                            '') + '.json'

        net.write_json_to_file('viz', filename)
Exemplo n.º 2
0
def make_phos_homepage_viz():

    from clustergrammer import Network
    net = Network()

    filename = 'lung_cellline_3_1_16/lung_cellline_phospho/' + \
      'lung_cellline_TMT_phospho_combined_ratios.tsv'

    net.load_file(filename)

    # quantile normalize to normalize cell lines
    net.normalize(axis='col', norm_type='qn')

    # only keep most differentially regulated PTMs
    net.filter_N_top('row', 250, 'sum')

    # take zscore of rows
    net.normalize(axis='row', norm_type='zscore', keep_orig=True)

    net.swap_nan_for_zero()

    # threshold filter PTMs
    net.filter_threshold('row', threshold=1.75, num_occur=3)

    views = ['N_row_sum', 'N_row_var']
    net.make_clust(dist_type='cos',
                   views=views,
                   dendro=True,
                   sim_mat=True,
                   calc_cat_pval=True)

    net.write_json_to_file('viz', 'json/homepage_phos.json', 'indent')
Exemplo n.º 3
0
def clust_vect(db, viz_doc, vect_post):

    from clustergrammer import Network

    try:
        net = Network()
        net.load_vect_post_to_net(vect_post)
        net.swap_nan_for_zero()

        views = ['N_row_sum', 'N_row_var']
        net.make_clust(dist_type='cosine',
                       dendro=True,
                       views=views,
                       linkage_type='average')

        dat_id = upload_dat(db, net)

        update_viz = net.viz
        update_dat = dat_id

    except:
        print('error clustering')
        update_viz = 'error'
        update_dat = 'error'

    viz_doc['viz'] = update_viz
    viz_doc['dat'] = update_dat

    return viz_doc
def make_viz_json(inst_df, name):
  from clustergrammer import Network
  net = Network()

  filename = 'json/'+name
  load_df = {}
  load_df['mat'] = inst_df
  net.df_to_dat(load_df)
  net.swap_nan_for_zero()
  net.make_clust(views=[])
  net.write_json_to_file('viz', filename, 'no-indent')
Exemplo n.º 5
0
def main( buff, inst_filename, mongo_address, viz_id):
  import numpy as np
  import flask
  from bson.objectid import ObjectId
  from pymongo import MongoClient
  from flask import request
  from clustergrammer import Network
  import StringIO

  client = MongoClient(mongo_address)
  db = client.clustergrammer

  viz_id = ObjectId(viz_id)
  found_viz = db.networks.find_one({'_id':viz_id})

  try:

    net = Network()
    net.load_tsv_to_net(buff)

    net.swap_nan_for_zero()

    views = ['N_row_sum', 'N_row_var']

    net.make_clust(dist_type='cosine', dendro=True, views=views, \
                   linkage_type='average')

    export_dat = {}
    export_dat['name'] = inst_filename
    export_dat['dat'] = net.export_net_json('dat')
    export_dat['source'] = 'user_upload'

    dat_id = db.network_data.insert(export_dat)

    update_viz = net.viz 
    update_dat = dat_id

  except:
    print('\n-----------------------')
    print('error in clustering')
    print('-----------------------\n')
    update_viz = 'error'
    update_dat = 'error'

  found_viz['viz'] = update_viz
  found_viz['dat'] = update_dat

  db.networks.update_one( {'_id':viz_id}, {'$set': found_viz} )

  client.close()


  
Exemplo n.º 6
0
def main(buff, inst_filename, mongo_address, viz_id):
    import numpy as np
    import flask
    from bson.objectid import ObjectId
    from pymongo import MongoClient
    from flask import request
    from clustergrammer import Network
    import StringIO

    client = MongoClient(mongo_address)
    db = client.clustergrammer

    viz_id = ObjectId(viz_id)
    found_viz = db.networks.find_one({'_id': viz_id})

    try:

        net = Network()
        net.load_tsv_to_net(buff)

        net.swap_nan_for_zero()

        views = ['N_row_sum', 'N_row_var']

        net.make_clust(dist_type='cosine', dendro=True, views=views, \
                       linkage_type='average')

        export_dat = {}
        export_dat['name'] = inst_filename
        export_dat['dat'] = net.export_net_json('dat')
        export_dat['source'] = 'user_upload'

        dat_id = db.network_data.insert(export_dat)

        update_viz = net.viz
        update_dat = dat_id

    except:
        print('\n-----------------------')
        print('error in clustering')
        print('-----------------------\n')
        update_viz = 'error'
        update_dat = 'error'

    found_viz['viz'] = update_viz
    found_viz['dat'] = update_dat

    db.networks.update_one({'_id': viz_id}, {'$set': found_viz})

    client.close()
def cluster():
  from clustergrammer import Network

  net = Network()

  vect_post = net.load_json_to_dict('fake_vect_post.json')  

  net.load_vect_post_to_net(vect_post)

  net.swap_nan_for_zero()
  
  # net.N_top_views()
  net.make_clust(dist_type='cos',views=['N_row_sum','N_row_var'], dendro=True)

  net.write_json_to_file('viz','json/large_vect_post_example.json','indent')  
def clustergrammer_load():
    # import network class from Network.py
    from clustergrammer import Network

    net = Network()

    net.pandas_load_file('mat_cats.tsv')

    net.make_clust(dist_type='cos', views=['N_row_sum', 'N_row_var'])

    net.write_json_to_file('viz', 'json/mult_cats.json', 'indent')

    print('\n**********************')
    print(net.dat['node_info']['row'].keys())

    print('\n\n')
def clustergrammer_load():
  # import network class from Network.py
  from clustergrammer import Network

  net = Network()

  net.pandas_load_file('mat_cats.tsv')  

  net.make_clust(dist_type='cos',views=['N_row_sum','N_row_var'])

  net.write_json_to_file('viz','json/mult_cats.json','indent')  

  print('\n**********************')
  print(net.dat['node_info']['row'].keys())

  print('\n\n')
Exemplo n.º 10
0
def build_layout():
    df = load_data.load_gsva_compare_cluster('hallmark')

    # TODO THIS NEEDS TO BE CLEANED UP!!!!!!
    cat_to_true = defaultdict(lambda: [])
    for clust in df.index:
        if 'PJ030' in clust:
            cat_to_true['LGG'].append(clust)
        elif 'PJ' in clust:
            cat_to_true['GBM'].append(clust)
        elif 'LX' in clust:
            cat_to_true['LUAD'].append(clust)
        elif 'GSE146026' in clust:
            cat_to_true['OV'].append(clust)
        elif 'GSE72056' in clust:
            cat_to_true['SKCM'].append(clust)
        elif 'GSE103322' in clust:
            cat_to_true['HNSC'].append(clust)
        elif 'GSE111672' in clust:
            cat_to_true['PAAD'].append(clust)

    cats = [{
        'title': 'Cancer Type',
        'cats': {k: v
                 for k, v in cat_to_true.items()}
    }]

    net = Network()
    net.load_df(df)
    net.add_cats('row', cats)
    net.make_clust()

    layout = dcc.Tab(label='Cluster Comparison',
                     children=[
                         dbc.Container(
                             fluid=True,
                             children=[
                                 html.Link(rel='stylesheet',
                                           href='./static/custom.css'),
                                 dash_clustergrammer.DashClustergrammer(
                                     id='cgram-component',
                                     label='',
                                     network_data=net.viz)
                             ])
                     ])
    return layout
Exemplo n.º 11
0
def cluster():
    from clustergrammer import Network

    net = Network()

    vect_post = net.load_json_to_dict('fake_vect_post.json')

    net.load_vect_post_to_net(vect_post)

    net.swap_nan_for_zero()

    # net.N_top_views()
    net.make_clust(dist_type='cos',
                   views=['N_row_sum', 'N_row_var'],
                   dendro=True)

    net.write_json_to_file('viz', 'json/large_vect_post_example.json',
                           'indent')
Exemplo n.º 12
0
def make_exp_homepage_viz():

    from clustergrammer import Network
    net = Network()

    net.load_file('CCLE_gene_expression/CCLE_NSCLC_all_genes.txt')

    # threshold filter expression
    net.filter_threshold('row', threshold=3.0, num_occur=4)

    views = ['N_row_sum', 'N_row_var']
    net.make_clust(dist_type='cos',
                   views=views,
                   dendro=True,
                   sim_mat=True,
                   calc_cat_pval=False)

    net.write_json_to_file('viz', 'json/homepage_exp.json', 'indent')
Exemplo n.º 13
0
def main():

  import time
  start_time = time.time()
  import pandas as pd
  import StringIO

  # import network class from Network.py
  from clustergrammer import Network

  net = Network()

  # load data to dataframe 
  # net.load_tsv_to_net('txt/example_tsv_network.txt')
  # net.load_tsv_to_net('txt/mat_1mb.txt')

  # choose file 
  ################
  # file_buffer = open('txt/col_categories.txt')
  file_buffer = open('txt/example_tsv_network.txt'  )


  buff = StringIO.StringIO( file_buffer.read() )
  net.pandas_load_tsv_to_net(buff)

  # filter rows 
  views = ['filter_row_sum','N_row_sum']

  # distance metric 
  dist_type = 'cosine'

  # linkage type 
  linkage_type = 'average'


  net.make_clust(dist_type=dist_type, views=views, calc_col_cats=True,\
  linkage_type=linkage_type)

  net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent')

  elapsed_time = time.time() - start_time
  print('\n\n\nelapsed time: '+str(elapsed_time))
Exemplo n.º 14
0
def main():

    import time
    start_time = time.time()
    import pandas as pd
    import StringIO

    # import network class from Network.py
    from clustergrammer import Network

    net = Network()

    # load data to dataframe
    # net.load_tsv_to_net('txt/example_tsv_network.txt')
    # net.load_tsv_to_net('txt/mat_1mb.txt')

    # choose file
    ################
    # file_buffer = open('txt/col_categories.txt')
    file_buffer = open('txt/example_tsv_network.txt')

    buff = StringIO.StringIO(file_buffer.read())
    net.pandas_load_tsv_to_net(buff)

    # filter rows
    views = ['filter_row_sum', 'N_row_sum']

    # distance metric
    dist_type = 'cosine'

    # linkage type
    linkage_type = 'average'


    net.make_clust(dist_type=dist_type, views=views, calc_col_cats=True,\
    linkage_type=linkage_type)

    net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent')

    elapsed_time = time.time() - start_time
    print('\n\n\nelapsed time: ' + str(elapsed_time))
def make_json_from_tsv(name):
  '''
  make a clustergrammer json from a tsv file
  '''
  from clustergrammer import Network

  print('\n' + name)

  net = Network()

  filename = 'txt/'+ name + '.txt'

  net.load_file(filename)

  df = net.dat_to_df()

  net.swap_nan_for_zero()

  # zscore first to get the columns distributions to be similar
  net.normalize(axis='col', norm_type='zscore', keep_orig=True)

  # filter the rows to keep the perts with the largest normalizes values
  net.filter_N_top('row', 1000)

  num_rows = net.dat['mat'].shape[0]
  num_cols = net.dat['mat'].shape[1]

  print('num_rows ' + str(num_rows))
  print('num_cols ' + str(num_cols))

  if num_cols < 50 or num_rows < 1000:

    views = ['N_row_sum']
    net.make_clust(dist_type='cos', views=views)
    export_filename = 'json/' + name + '.json'
    net.write_json_to_file('viz', export_filename)

  else:
    print('did not cluster, too many columns ')
Exemplo n.º 16
0
def make_viz_from_df(df, filename):
  from clustergrammer import Network

  net = Network()

  net.df_to_dat(df)
  net.swap_nan_for_zero()

  # zscore first to get the columns distributions to be similar
  net.normalize(axis='col', norm_type='zscore', keep_orig=True)

  # filter the rows to keep the perts with the largest normalizes values
  net.filter_N_top('row', 2000)

  num_coluns = net.dat['mat'].shape[1]

  if num_coluns < 50:
    # views = ['N_row_sum', 'N_row_var']
    views = ['N_row_sum']
    net.make_clust(dist_type='cos', views=views)

    filename = 'json/' + filename.split('/')[1].replace('.gct','') + '.json'

    net.write_json_to_file('viz', filename)
Exemplo n.º 17
0
from clustergrammer import Network
net = Network()

# load matrix tsv file
net.load_stdin()

# optional filtering and normalization
##########################################
# net.filter_sum('row', threshold=20)
# net.normalize(axis='col', norm_type='zscore', keep_orig=True)
# net.filter_N_top('row', 250, rank_type='sum')
# net.filter_threshold('row', threshold=3.0, num_occur=4)
# net.swap_nan_for_zero()

net.make_clust(dist_type='cos',
               views=['N_row_sum', 'N_row_var'],
               dendro=True,
               sim_mat=False,
               filter_sim=0.1,
               calc_cat_pval=False)

# output jsons for front-end visualizations
print(net.export_net_json('viz', 'no-indent'))
Exemplo n.º 18
0
import time
start_time = time.time()

from clustergrammer import Network
net = Network()

net.load_file('txt/rc_two_cats.txt')
# net.load_file('txt/tmp.txt')

views = ['N_row_sum', 'N_row_var']

net.make_clust(dist_type='cos', views=views, dendro=True, sim_mat=True)

net.write_json_to_file('viz', 'json/mult_view.json')
net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json')
net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json')

elapsed_time = time.time() - start_time

print('\n\nelapsed time')
print(elapsed_time)
Exemplo n.º 19
0
		# Remove names for clustergrammer
		gene_attribute_matrix.index.name = ""
		gene_attribute_matrix.columns.name = ""
		# Write to file
		# fp = StringIO()
		# gene_attribute_matrix.to_csv(fp, sep='\t')
		gene_attribute_matrix.to_csv('tmp.txt', sep='\t')

		# Custergrammer
		from clustergrammer import Network
		net = Network()
		# net.load_tsv_to_net(fp, name) # StringIO
		net.load_file('tmp.txt')
		net.swap_nan_for_zero()
		# Generate
		net.make_clust(dist_type='cos',views=['N_row_sum', 'N_row_var'], dendro=True,
					   sim_mat=True, filter_sim=0.1, calc_cat_pval=False)

		# Insert into database
		cur.execute('insert into `datasets` (`Name`, `prot_att`, `att_att`, `prot_prot`) values (?, ?, ?, ?)',
			(name,
			 net.export_net_json('viz', indent='no-indent'),
			 net.export_net_json('sim_col', indent='no-indent'),
			 net.export_net_json('sim_row', indent='no-indent')))
		con.commit()
	except Exception as e:
		print "Couldn't process %s (%s)" % (name, e)
		continue
	print "Processed %s" % (name)
con.close()
Exemplo n.º 20
0
def clust_from_response(response_list):
    from clustergrammer import Network
    import scipy
    import json
    import pandas as pd
    import math
    from copy import deepcopy

    # print('----------------------')
    # print('enrichr_clust_from_response')
    # print('----------------------')

    ini_enr = transfer_to_enr_dict(response_list)

    enr = []
    scores = {}
    score_types = ['combined_score', 'pval', 'zscore']

    for score_type in score_types:
        scores[score_type] = pd.Series()

    for inst_enr in ini_enr:
        if inst_enr['combined_score'] > 0:

            # make series of enriched terms with scores
            for score_type in score_types:

                # collect the scores of the enriched terms
                if score_type == 'combined_score':
                    scores[score_type][inst_enr['name']] = inst_enr[score_type]
                if score_type == 'pval':
                    scores[score_type][inst_enr['name']] = -math.log(
                        inst_enr[score_type])
                if score_type == 'zscore':
                    scores[score_type][
                        inst_enr['name']] = -inst_enr[score_type]

            # keep enrichement values
            enr.append(inst_enr)

    # sort and normalize the scores
    for score_type in score_types:
        scores[score_type] = scores[score_type] / scores[score_type].max()
        scores[score_type].sort_values(ascending=False)

    number_of_enriched_terms = len(scores['combined_score'])

    enr_score_types = ['combined_score', 'pval', 'zscore']

    if number_of_enriched_terms < 10:
        num_dict = {'ten': 10}
    elif number_of_enriched_terms < 20:
        num_dict = {'ten': 10, 'twenty': 20}
    else:
        num_dict = {'ten': 10, 'twenty': 20, 'thirty': 30}

    # gather lists of top scores
    top_terms = {}
    for enr_type in enr_score_types:
        top_terms[enr_type] = {}
        for num_terms in list(num_dict.keys()):
            inst_num = num_dict[num_terms]
            top_terms[enr_type][num_terms] = scores[enr_type].index.tolist(
            )[:inst_num]

    # gather the terms that should be kept - they are at the top of the score list
    keep_terms = []
    for inst_enr_score in top_terms:
        for tmp_num in list(num_dict.keys()):
            keep_terms.extend(top_terms[inst_enr_score][tmp_num])

    keep_terms = list(set(keep_terms))

    # keep enriched terms that are at the top 10 based on at least one score
    keep_enr = []
    for inst_enr in enr:
        if inst_enr['name'] in keep_terms:
            keep_enr.append(inst_enr)

    # fill in full matrix
    #######################

    # genes
    row_node_names = []
    # enriched terms
    col_node_names = []

    # gather information from the list of enriched terms
    for inst_enr in keep_enr:
        col_node_names.append(inst_enr['name'])
        row_node_names.extend(inst_enr['int_genes'])

    row_node_names = sorted(list(set(row_node_names)))

    net = Network()
    net.dat['nodes']['row'] = row_node_names
    net.dat['nodes']['col'] = col_node_names
    net.dat['mat'] = scipy.zeros([len(row_node_names), len(col_node_names)])

    for inst_enr in keep_enr:

        inst_term = inst_enr['name']
        col_index = col_node_names.index(inst_term)

        # use combined score for full matrix - will not be seen in viz
        tmp_score = scores['combined_score'][inst_term]
        net.dat['node_info']['col']['value'].append(tmp_score)

        for inst_gene in inst_enr['int_genes']:
            row_index = row_node_names.index(inst_gene)

            # save association
            net.dat['mat'][row_index, col_index] = 1

    # cluster full matrix
    #############################
    # do not make multiple views
    views = ['']

    if len(net.dat['nodes']['row']) > 1:
        net.make_clust(dist_type='jaccard', views=views, dendro=False)
    else:
        net.make_clust(dist_type='jaccard',
                       views=views,
                       dendro=False,
                       run_clustering=False)

    # get dataframe from full matrix
    df = net.dat_to_df()

    for score_type in score_types:

        for num_terms in num_dict:

            inst_df = deepcopy(df)
            inst_net = deepcopy(Network())

            inst_df['mat'] = inst_df['mat'][top_terms[score_type][num_terms]]

            # load back into net
            inst_net.df_to_dat(inst_df)

            # make views
            if len(net.dat['nodes']['row']) > 1:
                inst_net.make_clust(dist_type='jaccard',
                                    views=['N_row_sum'],
                                    dendro=False)
            else:
                inst_net.make_clust(dist_type='jaccard',
                                    views=['N_row_sum'],
                                    dendro=False,
                                    run_clustering=False)

            inst_views = inst_net.viz['views']

            # add score_type to views
            for inst_view in inst_views:

                inst_view['N_col_sum'] = num_dict[num_terms]

                inst_view['enr_score_type'] = score_type

                # add values to col_nodes and order according to rank
                for inst_col in inst_view['nodes']['col_nodes']:

                    inst_col['rank'] = len(
                        top_terms[score_type]
                        [num_terms]) - top_terms[score_type][num_terms].index(
                            inst_col['name'])

                    inst_name = inst_col['name']
                    inst_col['value'] = scores[score_type][inst_name]

            # add views to main network
            net.viz['views'].extend(inst_views)

    return net
net.load_file('txt/rc_two_cats.txt')
# net.load_file('txt/example_tsv.txt')
# net.load_file('txt/col_categories.txt')
# net.load_file('txt/mat_cats.tsv')
# net.load_file('txt/mat_1mb.Txt')
# net.load_file('txt/mnist.txt')
# net.load_file('txt/sim_mat_4_cats.txt')

views = ['N_row_sum','N_row_var']

# # filtering rows and cols by sum 
# net.filter_sum('row', threshold=20)
# net.filter_sum('col', threshold=30)
  
# # keep top rows based on sum 
# net.filter_N_top('row', 10, 'sum')

net.make_clust(dist_type='cos',views=views , dendro=True,
               sim_mat=True, filter_sim=0.1)

# net.produce_view({'N_row_sum':10,'dist':'euclidean'})

net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent')
net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent')
net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent')

elapsed_time = time.time() - start_time

print('\n\nelapsed time')
print(elapsed_time)
def clust_from_response(response_list):
  from clustergrammer import Network
  import scipy
  import json
  import pandas as pd
  import math
  from copy import deepcopy

  print('----------------------')
  print('enrichr_clust_from_response')
  print('----------------------')

  ini_enr = transfer_to_enr_dict( response_list )

  enr = []
  scores = {}
  score_types = ['combined_score','pval','zscore']

  for score_type in score_types:
    scores[score_type] = pd.Series()

  for inst_enr in ini_enr:
    if inst_enr['combined_score'] > 0:

      # make series of enriched terms with scores
      for score_type in score_types:

        # collect the scores of the enriched terms
        if score_type == 'combined_score':
          scores[score_type][inst_enr['name']] = inst_enr[score_type]
        if score_type == 'pval':
          scores[score_type][inst_enr['name']] = -math.log(inst_enr[score_type])
        if score_type == 'zscore':
          scores[score_type][inst_enr['name']] = -inst_enr[score_type]

      # keep enrichement values
      enr.append(inst_enr)

  # sort and normalize the scores
  for score_type in score_types:
    scores[score_type] = scores[score_type]/scores[score_type].max()
    scores[score_type].sort(ascending=False)

  number_of_enriched_terms = len(scores['combined_score'])

  enr_score_types = ['combined_score','pval','zscore']

  if number_of_enriched_terms <10:
    num_dict = {'ten':10}
  elif number_of_enriched_terms <20:
    num_dict = {'ten':10, 'twenty':20}
  else:
    num_dict = {'ten':10, 'twenty':20, 'thirty':30}

  # gather lists of top scores
  top_terms = {}
  for enr_type in enr_score_types:
    top_terms[enr_type] = {}
    for num_terms in num_dict.keys():
      inst_num = num_dict[num_terms]
      top_terms[enr_type][num_terms] = scores[enr_type].index.tolist()[: inst_num]

  # gather the terms that should be kept - they are at the top of the score list
  keep_terms = []
  for inst_enr_score in top_terms:
    for tmp_num in num_dict.keys():
      keep_terms.extend( top_terms[inst_enr_score][tmp_num] )

  keep_terms = list(set(keep_terms))

  # keep enriched terms that are at the top 10 based on at least one score
  keep_enr = []
  for inst_enr in enr:
    if inst_enr['name'] in keep_terms:
      keep_enr.append(inst_enr)


  # fill in full matrix
  #######################

  # genes
  row_node_names = []
  # enriched terms
  col_node_names = []

  # gather information from the list of enriched terms
  for inst_enr in keep_enr:
    col_node_names.append(inst_enr['name'])
    row_node_names.extend(inst_enr['int_genes'])

  row_node_names = sorted(list(set(row_node_names)))

  net = Network()
  net.dat['nodes']['row'] = row_node_names
  net.dat['nodes']['col'] = col_node_names
  net.dat['mat'] = scipy.zeros([len(row_node_names),len(col_node_names)])

  for inst_enr in keep_enr:

    inst_term = inst_enr['name']
    col_index = col_node_names.index(inst_term)

    # use combined score for full matrix - will not be seen in viz
    tmp_score = scores['combined_score'][inst_term]
    net.dat['node_info']['col']['value'].append(tmp_score)

    for inst_gene in inst_enr['int_genes']:
      row_index = row_node_names.index(inst_gene)

      # save association
      net.dat['mat'][row_index, col_index] = 1

  # cluster full matrix
  #############################
  # do not make multiple views
  views = ['']

  if len(net.dat['nodes']['row']) > 1:
    net.make_clust(dist_type='jaccard', views=views, dendro=False)
  else:
    net.make_clust(dist_type='jaccard', views=views, dendro=False, run_clustering=False)

  # get dataframe from full matrix
  df = net.dat_to_df()

  for score_type in score_types:

    for num_terms in num_dict:

      inst_df = deepcopy(df)
      inst_net = deepcopy(Network())

      inst_df['mat'] = inst_df['mat'][top_terms[score_type][num_terms]]

      # load back into net
      inst_net.df_to_dat(inst_df)

      # make views
      if len(net.dat['nodes']['row']) > 1:
        inst_net.make_clust(dist_type='jaccard', views=['N_row_sum'], dendro=False)
      else:
        inst_net.make_clust(dist_type='jaccard', views=['N_row_sum'], dendro=False, run_clustering = False)

      inst_views = inst_net.viz['views']

      # add score_type to views
      for inst_view in inst_views:

        inst_view['N_col_sum'] = num_dict[num_terms]

        inst_view['enr_score_type'] = score_type

        # add values to col_nodes and order according to rank
        for inst_col in inst_view['nodes']['col_nodes']:

          inst_col['rank'] = len(top_terms[score_type][num_terms]) - top_terms[score_type][num_terms].index(inst_col['name'])

          inst_name = inst_col['name']
          inst_col['value'] = scores[score_type][inst_name]

      # add views to main network
      net.viz['views'].extend(inst_views)

  return net
# net.normalize(axis='row', norm_type='qn')
# net.normalize(axis='col', norm_type='zscore', keep_orig=True)

# net.filter_N_top('row', 100, rank_type='var')
# net.filter_N_top('col', 3, rank_type='var')

# net.filter_threShold('col', threshold=2, num_occur=3
# net.filter_threshold('row', threshold=3.0, num_occur=4)

net.swap_nan_for_zero()

# df = net.dat_to_df()

views = ['N_row_sum', 'N_row_var']

net.make_clust(dist_type='cos',views=views , dendro=True,
               sim_mat=True, filter_sim=0.1, calc_cat_pval=False)

               # run_enrichr=['ChEA_2015'])
               # run_enrichr=['ENCODE_TF_ChIP-seq_2014'])
               # run_enrichr=['KEA_2015'])
               # run_enrichr=['GO_Biological_Process_2015'])

net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent')
net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent')
net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent')

# net.write_matrix_to_tsv ('txt/export_tmp.txt')

elapsed_time = time.time() - start_time
print('\n\nelapsed time: '+str(elapsed_time))
Exemplo n.º 24
0
def genNetworkFromMatrix(matr):
    net = Network()
    # net.load_file('txt/example.txt')
    net.load_file_as_string(matr)
    net.make_clust(run_clustering=False, dendro=False, views=[])
    return net.export_net_json('viz', 'no-indent')
# net.load_file('txt/mat_cats.tsv')
# net.load_file('txt/mat_1mb.Txt')
# net.load_file('txt/mnist.txt')
# net.load_file('txt/sim_mat_4_cats.txt')

views = ['N_row_sum', 'N_row_var']

# # filtering rows and cols by sum
# net.filter_sum('row', threshold=20)
# net.filter_sum('col', threshold=30)

# # keep top rows based on sum
# net.filter_N_top('row', 10, 'sum')

net.make_clust(dist_type='cos',
               views=views,
               dendro=True,
               sim_mat=True,
               filter_sim=0.1)

# net.produce_view({'N_row_sum':10,'dist':'euclidean'})

net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent')
net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent')
net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent')

elapsed_time = time.time() - start_time

print('\n\nelapsed time')
print(elapsed_time)
Exemplo n.º 26
0
import time
start_time = time.time()

from clustergrammer import Network
net = Network()

net.load_file('txt/rc_two_cats.txt')
# net.load_file('txt/tmp.txt')

views = ['N_row_sum','N_row_var']

net.make_clust(dist_type='cos',views=views , dendro=True, sim_mat=True)

net.write_json_to_file('viz', 'json/mult_view.json')
net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json')
net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json')

elapsed_time = time.time() - start_time

print('\n\nelapsed time')
print(elapsed_time)
####################
inst_name = 'Tyrosine'
# net.load_file('txt/phos_ratios_all_treat_no_geld_ST.txt')
net.load_file('txt/phos_ratios_all_treat_no_geld_Tyrosine.txt')

net.swap_nan_for_zero()

# net.normalize(axis='row', norm_type='zscore', keep_orig=True)

print(net.dat.keys())

views = ['N_row_sum', 'N_row_var']

net.make_clust(dist_type='cos',
               views=views,
               dendro=True,
               sim_mat=True,
               filter_sim=0.1,
               calc_cat_pval=False)
# run_enrichr=['KEA_2015'])
# run_enrichr=['ENCODE_TF_ChIP-seq_2014'])
# run_enrichr=['GO_Biological_Process_2015'])

net.write_json_to_file('viz', 'json/' + inst_name + '.json', 'no-indent')
net.write_json_to_file('sim_row', 'json/' + inst_name + '_sim_row.json',
                       'no-indent')
net.write_json_to_file('sim_col', 'json/' + inst_name + '_sim_col.json',
                       'no-indent')

elapsed_time = time.time() - start_time
print('\n\nelapsed time: ' + str(elapsed_time))