def generate_subsampled_datasets():
    '''
  This will generate subsampled tsvs from the MNIST dataset
  '''
    from clustergrammer import Network

    net = Network()
    # load full MNIST data with row labels
    net.load_file('processed_MNIST/large_files/MNIST_row_labels.txt')
    tmp_df = net.dat_to_df()
    df = tmp_df['mat']

    all_sample_nums = [20, 100, 200, 300, 400, 500, 1000]
    sample_repeats = 5

    for sample_num in all_sample_nums:

        df_subs = take_multiple_subsamples(df, sample_num, sample_repeats)

        for inst_subsample in df_subs:
            inst_df = df_subs[inst_subsample]

            inst_df = add_MNIST_cats(inst_df, row_cats=False)

            inst_filename = 'processed_MNIST/random_subsampling/MNIST_' \
                            +str(sample_num)+'x_random_subsample_'+str(inst_subsample)+'.txt'

            print(inst_df.shape)
            inst_df.to_csv(inst_filename, sep='\t')
Exemplo n.º 2
0
def make_phos_homepage_viz():

    from clustergrammer import Network
    net = Network()

    filename = 'lung_cellline_3_1_16/lung_cellline_phospho/' + \
      'lung_cellline_TMT_phospho_combined_ratios.tsv'

    net.load_file(filename)

    # quantile normalize to normalize cell lines
    net.normalize(axis='col', norm_type='qn')

    # only keep most differentially regulated PTMs
    net.filter_N_top('row', 250, 'sum')

    # take zscore of rows
    net.normalize(axis='row', norm_type='zscore', keep_orig=True)

    net.swap_nan_for_zero()

    # threshold filter PTMs
    net.filter_threshold('row', threshold=1.75, num_occur=3)

    views = ['N_row_sum', 'N_row_var']
    net.make_clust(dist_type='cos',
                   views=views,
                   dendro=True,
                   sim_mat=True,
                   calc_cat_pval=True)

    net.write_json_to_file('viz', 'json/homepage_phos.json', 'indent')
def main():
  from clustergrammer import Network

  net = Network()

  net.load_file('txt/rc_two_cats.txt')

  tmp_size = 50

  inst_dm = make_distance_matrix(net, tmp_size)

  randomly_sample_rows(net, inst_dm, tmp_size)
def main():
    from clustergrammer import Network

    net = Network()

    net.load_file('txt/rc_two_cats.txt')

    tmp_size = 50

    inst_dm = make_distance_matrix(net, tmp_size)

    randomly_sample_rows(net, inst_dm, tmp_size)
def make_plex_matrix():
  '''
  Make a cell line matrix with plex rows and cell line columns.
  This will be used as a negative control that should show worsening correlation
  as data is normalized/filtered.
  '''
  import numpy as np
  import pandas as pd
  from clustergrammer import Network

  # load cl_info
  net = Network()
  cl_info = net.load_json_to_dict('../cell_line_info/cell_line_info_dict.json')

  # load cell line expression
  net.load_file('../CCLE_gene_expression/CCLE_NSCLC_all_genes.txt')
  tmp_df = net.dat_to_df()
  df = tmp_df['mat']

  cols = df.columns.tolist()

  rows = range(9)
  rows = [i+1 for i in rows]
  print(rows)

  mat = np.zeros((len(rows), len(cols)))

  for inst_col in cols:

    for inst_cl in cl_info:

      if inst_col in inst_cl:
        inst_plex = int(cl_info[inst_cl]['Plex'])

        if inst_plex != -1:
          # print(inst_col + ' in ' + inst_cl + ': ' + str(inst_plex))

          row_index = rows.index(inst_plex)
          col_index = cols.index(inst_col)

          mat[row_index, col_index] = 1


  df_plex = pd.DataFrame(data=mat, columns=cols, index=rows)

  filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' + \
            'exp-plex.txt'
  df_plex.to_csv(filename, sep='\t')
Exemplo n.º 6
0
def make_plex_matrix():
    '''
  Make a cell line matrix with plex rows and cell line columns.
  This will be used as a negative control that should show worsening correlation
  as data is normalized/filtered.
  '''
    import numpy as np
    import pandas as pd
    from clustergrammer import Network

    # load cl_info
    net = Network()
    cl_info = net.load_json_to_dict(
        '../cell_line_info/cell_line_info_dict.json')

    # load cell line expression
    net.load_file('../CCLE_gene_expression/CCLE_NSCLC_all_genes.txt')
    tmp_df = net.dat_to_df()
    df = tmp_df['mat']

    cols = df.columns.tolist()

    rows = range(9)
    rows = [i + 1 for i in rows]
    print(rows)

    mat = np.zeros((len(rows), len(cols)))

    for inst_col in cols:

        for inst_cl in cl_info:

            if inst_col in inst_cl:
                inst_plex = int(cl_info[inst_cl]['Plex'])

                if inst_plex != -1:
                    # print(inst_col + ' in ' + inst_cl + ': ' + str(inst_plex))

                    row_index = rows.index(inst_plex)
                    col_index = cols.index(inst_col)

                    mat[row_index, col_index] = 1

    df_plex = pd.DataFrame(data=mat, columns=cols, index=rows)

    filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' + \
              'exp-plex.txt'
    df_plex.to_csv(filename, sep='\t')
Exemplo n.º 7
0
def main():
  import numpy as np
  import pandas as pd
  from clustergrammer import Network

  rtk_list = load_rtks()

  net = Network()
  net.load_file('txt/tmp_cst_drug_treat_cl.txt')
  df_dict = net.dat_to_df()

  inst_df = df_dict['mat']

  inst_df = inst_df.ix[rtk_list]

  inst_df.to_csv('txt/RTK_exp_in_drug_treat_cl.txt', sep='\t')
Exemplo n.º 8
0
def make_exp_homepage_viz():

    from clustergrammer import Network
    net = Network()

    net.load_file('CCLE_gene_expression/CCLE_NSCLC_all_genes.txt')

    # threshold filter expression
    net.filter_threshold('row', threshold=3.0, num_occur=4)

    views = ['N_row_sum', 'N_row_var']
    net.make_clust(dist_type='cos',
                   views=views,
                   dendro=True,
                   sim_mat=True,
                   calc_cat_pval=False)

    net.write_json_to_file('viz', 'json/homepage_exp.json', 'indent')
Exemplo n.º 9
0
def prepare_heatmap(matrix_input, html_file, html_dir, tools_dir, categories, distance, linkage):
    # prepare directory and html
    os.mkdir(html_dir)

    env = Environment(loader=FileSystemLoader(tools_dir + "/templates"))
    template = env.get_template("clustergrammer.template")
    overview = template.render()
    with open(html_file, "w") as outf:
        outf.write(overview)

    json_output = html_dir + "/mult_view.json"

    net = Network()
    net.load_file(matrix_input)
    if (categories['row']):
        net.add_cats('row', categories['row'])
    if (categories['col']):
        net.add_cats('col', categories['col'])
    net.cluster(dist_type=distance, linkage_type=linkage)
    net.write_json_to_file('viz', json_output)
Exemplo n.º 10
0
def main():

  net = Network()

  # load genes of interest
  gene_info = net.load_json_to_dict('../grant_pois/gene_info_with_dark.json')

  # ENCODE, GTEx, etc
  # hzome_names = ['my_CCLE_exp.txt', 'ENCODE_TF_targets.txt', 'ChEA_TF_targets.txt']
  hzome_names = ['ENCODE_TF_targets.txt']

  # define separate sim_cutoffs for different files
  cutoffs = {}
  cutoffs['my_CCLE_exp.txt'] = 0.15
  cutoffs['ENCODE_TF_targets.txt'] = 0.35 ## 0.6
  cutoffs['ChEA_TF_targets.txt'] = 0.2
  cutoffs['my_gtex_Moshe_2017_exp.txt'] = 0.2

  genes_of_class = gene_info['KIN']['all']

  for hzome_name in hzome_names:

    hzome_filename = '../hzome_data/' + hzome_name

    print('loading data ')

    # load hzome data
    ####################
    if 'my_' in hzome_name:
      # if I am providing the data, then load in normal way
      net.load_file(hzome_filename)
      hzome_data = net.export_df()
    else:
      # load data in hzome format
      hzome_data = deepcopy(hzome_to_df.load_matrix(hzome_filename))

    print('data loaded\n')

    for gene_class in gene_info:
      calc_gene_sim_mat(hzome_data, net, gene_info, gene_class, hzome_name, cutoffs)
def make_json_from_tsv(name):
  '''
  make a clustergrammer json from a tsv file
  '''
  from clustergrammer import Network

  print('\n' + name)

  net = Network()

  filename = 'txt/'+ name + '.txt'

  net.load_file(filename)

  df = net.dat_to_df()

  net.swap_nan_for_zero()

  # zscore first to get the columns distributions to be similar
  net.normalize(axis='col', norm_type='zscore', keep_orig=True)

  # filter the rows to keep the perts with the largest normalizes values
  net.filter_N_top('row', 1000)

  num_rows = net.dat['mat'].shape[0]
  num_cols = net.dat['mat'].shape[1]

  print('num_rows ' + str(num_rows))
  print('num_cols ' + str(num_cols))

  if num_cols < 50 or num_rows < 1000:

    views = ['N_row_sum']
    net.make_clust(dist_type='cos', views=views)
    export_filename = 'json/' + name + '.json'
    net.write_json_to_file('viz', export_filename)

  else:
    print('did not cluster, too many columns ')
def equal_digit_sampling_MNIST():
    '''
  Sample N instances of each digit from the MNIST dataset
  '''

    from clustergrammer import Network
    net = Network()
    net.load_file('processed_MNIST/large_files/MNIST_row_labels.txt')
    tmp_df = net.dat_to_df()
    df = tmp_df['mat']

    print(df.shape)

    label_dict = get_label_dict()

    num_sample = 30

    # only keep 20 instances of each numbers
    ###########################################
    keep_cols = []

    for inst_digit in label_dict:
        tmp_name = label_dict[inst_digit]

        # select 20 instances of each digit
        for i in range(num_sample):
            inst_name = tmp_name + '-' + str(i)
            keep_cols.append(inst_name)

    # grab subset of numbers
    df = df[keep_cols]

    df = add_MNIST_cats()

    print('shape after processing')
    print(df.shape)

    df.to_csv('processed_MNIST/MNIST_' + str(num_sample) + 'x_original.txt',
              sep='\t')
def reproduce_Mark_correlation_matrix():
    import pandas as pd
    from scipy.spatial.distance import squareform
    from clustergrammer import Network
    from copy import deepcopy

    dist_vect = calc_custom_dist(data_type='ptm_none',
                                 dist_metric='correlation',
                                 pairwise='True')

    dist_mat = squareform(dist_vect)

    # make similarity matrix
    dist_mat = 1 - dist_mat

    net = Network()

    data_type = 'ptm_none'

    filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' + \
               data_type + '.txt'

    # load file and export dataframe
    net = deepcopy(Network())
    net.load_file(filename)
    net.swap_nan_for_zero()
    tmp_df = net.dat_to_df()
    df = tmp_df['mat']

    cols = df.columns.tolist()
    rows = cols

    mark_df = pd.DataFrame(data=dist_mat, columns=cols, index=rows)

    save_filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' \
               + 'Mark_corr_sim_mat' + '.txt'
    mark_df.to_csv(save_filename, sep='\t', na_rep='nan')
def reproduce_Mark_correlation_matrix():
  import pandas as pd
  from scipy.spatial.distance import squareform
  from clustergrammer import Network
  from copy import deepcopy

  dist_vect = calc_custom_dist(data_type='ptm_none', dist_metric='correlation',
                              pairwise='True')


  dist_mat = squareform(dist_vect)

  # make similarity matrix
  dist_mat = 1 - dist_mat

  net = Network()

  data_type = 'ptm_none'

  filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' + \
             data_type + '.txt'

  # load file and export dataframe
  net = deepcopy(Network())
  net.load_file(filename)
  net.swap_nan_for_zero()
  tmp_df = net.dat_to_df()
  df = tmp_df['mat']

  cols = df.columns.tolist()
  rows = cols

  mark_df = pd.DataFrame(data=dist_mat, columns=cols, index=rows)

  save_filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' \
             + 'Mark_corr_sim_mat' + '.txt'
  mark_df.to_csv(save_filename, sep='\t')
import time
start_time = time.time()

from clustergrammer import Network
net = Network()

# choose tsv file
####################
inst_name = 'Tyrosine'
# net.load_file('txt/phos_ratios_all_treat_no_geld_ST.txt')
net.load_file('txt/phos_ratios_all_treat_no_geld_Tyrosine.txt')


net.swap_nan_for_zero()

# net.normalize(axis='row', norm_type='zscore', keep_orig=True)

print(net.dat.keys())

views = ['N_row_sum', 'N_row_var']

net.make_clust(dist_type='cos',views=views , dendro=True,
               sim_mat=True, filter_sim=0.1, calc_cat_pval=False)
               # run_enrichr=['KEA_2015'])
               # run_enrichr=['ENCODE_TF_ChIP-seq_2014'])
               # run_enrichr=['GO_Biological_Process_2015'])

net.write_json_to_file('viz', 'json/'+inst_name+'.json', 'no-indent')
net.write_json_to_file('sim_row', 'json/'+inst_name+'_sim_row.json', 'no-indent')
net.write_json_to_file('sim_col', 'json/'+inst_name+'_sim_col.json', 'no-indent')
Exemplo n.º 16
0
		# Format index/headers for clustergrammer
		gene_attribute_matrix.index = gene_attribute_matrix.index.map(lambda s: '%s: %s' % (gene_attribute_matrix.index.name, s))
		gene_attribute_matrix.columns = gene_attribute_matrix.columns.map(lambda s: '%s: %s' % (gene_attribute_matrix.columns.name, s))
		# Remove names for clustergrammer
		gene_attribute_matrix.index.name = ""
		gene_attribute_matrix.columns.name = ""
		# Write to file
		# fp = StringIO()
		# gene_attribute_matrix.to_csv(fp, sep='\t')
		gene_attribute_matrix.to_csv('tmp.txt', sep='\t')

		# Custergrammer
		from clustergrammer import Network
		net = Network()
		# net.load_tsv_to_net(fp, name) # StringIO
		net.load_file('tmp.txt')
		net.swap_nan_for_zero()
		# Generate
		net.make_clust(dist_type='cos',views=['N_row_sum', 'N_row_var'], dendro=True,
					   sim_mat=True, filter_sim=0.1, calc_cat_pval=False)

		# Insert into database
		cur.execute('insert into `datasets` (`Name`, `prot_att`, `att_att`, `prot_prot`) values (?, ?, ?, ?)',
			(name,
			 net.export_net_json('viz', indent='no-indent'),
			 net.export_net_json('sim_col', indent='no-indent'),
			 net.export_net_json('sim_row', indent='no-indent')))
		con.commit()
	except Exception as e:
		print "Couldn't process %s (%s)" % (name, e)
		continue
Exemplo n.º 17
0
'''
Python 2.7
The clustergrammer python module can be installed using pip:
pip install clustergrammer

or by getting the code from the repo:
https://github.com/MaayanLab/clustergrammer-py
'''

from clustergrammer import Network
net = Network()

# load matrix tsv file
net.load_file('in.tsv')
# net.load_file('txt/ccle_example.txt')
# net.load_file('txt/rc_val_cats.txt')
# net.load_file('txt/number_labels.txt')
# net.load_file('txt/mnist.txt')
# net.load_file('txt/tuple_cats.txt')
# net.load_file('txt/example_tsv.txt')

# net.enrichrgram('KEA_2015')

# optional filtering and normalization
##########################################
# net.filter_sum('row', threshold=20)
# net.normalize(axis='col', norm_type='zscore', keep_orig=True)
# net.filter_N_top('row', 250, rank_type='sum')
# net.filter_threshold('row', threshold=3.0, num_occur=4)
# net.swap_nan_for_zero()
# net.set_cat_color('col', 1, 'Category: one', 'blue')
Exemplo n.º 18
0
# make network object and load file
from clustergrammer import Network

net = Network()
net.load_file('mult_view.tsv')

# Z-score normalize the rows
#net.normalize(axis='row', norm_type='zscore', keep_orig=True)

# calculate clustering using default parameters
net.cluster()

# save visualization JSON to file for use by front end
net.write_json_to_file('viz', 'mult_view.json')

#	needs pandas and sklearn as well
#	pip install --user --upgrade clustergrammer pandas sklearn
Exemplo n.º 19
0
net = Network()
filename = sys.argv[1]
outname = sys.argv[2]
wd = sys.argv[3]
jobid = sys.argv[4]
use_user_label = sys.argv[5]

user_label = jobid + '_user_label_name.txt'
df = pd.read_csv(user_label, sep='\t', header=0)
unique_array = df.iloc[:, 0].unique()
#df['num_unique'] = df.nunique(axis=1)
#print(unique_array)
#print(df.iloc[0,:].unique())

net.load_file(filename)
color_array = [
    '#92896B', '#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231',
    '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080',
    '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000',
    '#ffd8b1', '#000075', '#808080', '#ff0000', '#000000'
]
color_array2 = [
    "#000000", "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941",
    "#006FA6", "#A30059", "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC",
    "#B79762", "#004D43", "#8FB0FF", "#997D87", "#5A0007", "#809693",
    "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80",
    "#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9",
    "#B903AA", "#D16100", "#DDEFFF", "#000035", "#7B4F4B", "#A1C299",
    "#300018", "#0AA6D8", "#013349", "#00846F", "#372101", "#FFB500",
    "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09",
Exemplo n.º 20
0
# make network object and load file
from clustergrammer import Network
net = Network()

b = "cluster.txt"
d = "cluster.json"

net.load_file(b)

# calculate clustering using default parameters
net.cluster()

# save visualization JSON to file for use by front end
net.write_json_to_file('viz', 'cluster.json')
import time
start_time = time.time()

from clustergrammer import Network
net = Network()

# choose tsv file
####################
inst_name = 'Tyrosine'
# net.load_file('txt/phos_ratios_all_treat_no_geld_ST.txt')
net.load_file('txt/phos_ratios_all_treat_no_geld_Tyrosine.txt')

net.swap_nan_for_zero()

# net.normalize(axis='row', norm_type='zscore', keep_orig=True)

print(net.dat.keys())

views = ['N_row_sum', 'N_row_var']

net.make_clust(dist_type='cos',
               views=views,
               dendro=True,
               sim_mat=True,
               filter_sim=0.1,
               calc_cat_pval=False)
# run_enrichr=['KEA_2015'])
# run_enrichr=['ENCODE_TF_ChIP-seq_2014'])
# run_enrichr=['GO_Biological_Process_2015'])

net.write_json_to_file('viz', 'json/' + inst_name + '.json', 'no-indent')
'''
Python 2.7
The clustergrammer python module can be installed using pip:
pip install clustergrammer

or by getting the code from the repo:
https://github.com/MaayanLab/clustergrammer-py
'''

from clustergrammer import Network
net = Network()

# load matrix tsv file
net.load_file('C:/Users/omkar/Desktop/clustergram.txt')
print("File Loaded!")
# net.load_file('txt/ccle_example.txt')
# net.load_file('txt/rc_val_cats.txt')
# net.load_file('txt/number_labels.txt')
# net.load_file('txt/mnist.txt')
# net.load_file('txt/tuple_cats.txt')
# net.load_file('txt/example_tsv.txt')

# net.enrichrgram('KEA_2015')

# optional filtering and normalization
##########################################
# net.filter_sum('row', threshold=20)
# net.normalize(axis='col', norm_type='zscore', keep_orig=True)
# net.filter_N_top('row', 250, rank_type='sum')
# net.filter_threshold('row', threshold=3.0, num_occur=4)
# net.swap_nan_for_zero()
Exemplo n.º 23
0
'''
Python 2.7
The clustergrammer python module can be installed using pip:
pip install clustergrammer

or by getting the code from the repo:
https://github.com/MaayanLab/clustergrammer-py
'''

from clustergrammer import Network
net = Network()

# load matrix tsv file
net.load_file('../data_mats/df_predict_merge.txt')

net.set_cat_color('row', 1, 'virus: chik', 'blue')
net.set_cat_color('row', 1, 'virus: zika', 'red')
net.cluster(enrichrgram=False)

# transfer colors from original to predicted categories
########################################################

# make category colors the same for Chik groups
for inst_cat in net.viz['cat_colors']['row']['cat-1']:
    new_cat = inst_cat.replace('original', 'predict')
    inst_color = net.viz['cat_colors']['row']['cat-1'][inst_cat]
    net.set_cat_color('row', 3, new_cat, inst_color)

net.cluster(enrichrgram=False)

# write jsons for front-end visualizations
Exemplo n.º 24
0
#from sys import argv
from clustergrammer import Network

net = Network()
net.load_file('mat.txt')
#argv[1]
# calculate clustering using default parameters
net.cluster()

# save visualization JSON to file for use by front end
net.write_json_to_file('viz', 'kbio_mhci_view.json')

net2 = Network()
net2.load_file('mat2.txt')
#argv[1]
# calculate clustering using default parameters
net2.cluster()

# save visualization JSON to file for use by front end
net2.write_json_to_file('viz', 'kbio_mhci_view_summary.json')
Exemplo n.º 25
0
import os, sys, re
from collections import defaultdict

# make network object and load file
from clustergrammer import Network

if __name__ == "__main__":

    matrix_filename = sys.argv[1]
    html_output_filename = sys.argv[2]

    print('loading file...')
    net = Network()
    # load matrix file
    net.load_file(matrix_filename)
    print('done')

    # cluster using default parameters
    print('clustering the matrix...')
    net.cluster(dist_type='jaccard', linkage_type='complete')
    #    net.cluster(run_clustering=False)
    print('done')

    # save visualization JSON to file for use by front end
    print('saving results in json file...')
    json_filename = matrix_filename + '.json'
    net.write_json_to_file('viz', json_filename)
    print('done')

    # creating the html page
Exemplo n.º 26
0
ids = delta_f.columns.map(lambda x: x.split('|')[0])
fout = open("%s_heatmap_matrix.txt" % args.d, 'w')
fout.write("\t\t%s\n" % ('\t'.join(tfs)))

cls = []
for i in ids:
    if ann_dict.get(i, ['NA'])[0] == 'NA':
        cls.append("Cell Line: %s" % ('NA'))
    else:
        cls.append("Cell Line: %s" % (ann_dict[i][0]))
fout.write("\t\t%s\n" % ('\t'.join(cls)))

ts = []
for i in ids:
    if ann_dict.get(i, ['NA', 'NA'])[1] == 'NA':
        ts.append("Tissue: %s" % ('NA'))
    else:
        ts.append("Tissue: %s" % (ann_dict[i][1]))
fout.write("\t\t%s\n" % ('\t'.join(ts)))

for i in range(status.shape[0]):
    fout.write('%s\t%s\t%s\n' %
               ("Gene: %s" % genes[i], "Input Gene: %s" % status[i], '\t'.join(
                   delta_f.iloc[i, :].map(str))))
fout.close()

net.load_file("%s_heatmap_matrix.txt" % args.d)
net.cluster()
net.write_json_to_file('viz', '%s_mult_view.json' % args.d)
Exemplo n.º 27
0
Python 2.7
The clustergrammer python module can be installed using pip:
pip install clustergrammer

or by getting the code from the repo:
https://github.com/MaayanLab/clustergrammer-py
'''

from clustergrammer import Network

net = Network()

# load matrix tsv file
#net.load_file('txt/rc_two_cats.txt')

net.load_file('txt/papseek_data.txt')
#net.write_json_to_file('viz', 'json/pooja.json', 'indent')

# net.load_file('txt/ccle_example.txt')
# net.load_file('txt/rc_val_cats.txt')
# net.load_file('txt/number_labels.txt')
# net.load_file('txt/mnist.txt')
# net.load_file('txt/tuple_cats.txt')
# net.load_file('txt/example_tsv.txt')

# net.enrichrgram('KEA_2015')

# optional filtering and normalization
##########################################
# net.filter_sum('row', threshold=20)
# net.normalize(axis='col', norm_type='zscore', keep_orig=True)
Exemplo n.º 28
0
from clustergrammer import Network
import sys
filename = sys.argv[-1]
net = Network()
print("Python is fun.")
print(filename)
filepath = '/Users/snehalpatil/Documents/GithubProjects/gsesuite-data/heatmap/' + (
    filename)
print(filepath)
net.load_file(filepath)
net.cluster()

jsonname = filename.replace(".txt", ".json")

jsonfilepath = '/Users/snehalpatil/Documents/GithubProjects/gsesuite-data/heatmap/' + jsonname
net.write_json_to_file('viz', jsonfilepath)
# make network object and load file
from clustergrammer import Network
net = Network()
net.load_file('mult_view.tsv')




# Z-score normalize the rows
#net.normalize(axis='row', norm_type='zscore', keep_orig=True)





# calculate clustering using default parameters
net.cluster()

# save visualization JSON to file for use by front end
net.write_json_to_file('viz', 'mult_view.json')



#	needs pandas and sklearn as well
#	pip install --user --upgrade clustergrammer pandas sklearn
Exemplo n.º 30
0
            lambda s: '%s: %s' % (gene_attribute_matrix.index.name, s))
        gene_attribute_matrix.columns = gene_attribute_matrix.columns.map(
            lambda s: '%s: %s' % (gene_attribute_matrix.columns.name, s))
        # Remove names for clustergrammer
        gene_attribute_matrix.index.name = ""
        gene_attribute_matrix.columns.name = ""
        # Write to file
        # fp = StringIO()
        # gene_attribute_matrix.to_csv(fp, sep='\t')
        gene_attribute_matrix.to_csv('tmp.txt', sep='\t')

        # Custergrammer
        from clustergrammer import Network
        net = Network()
        # net.load_tsv_to_net(fp, name) # StringIO
        net.load_file('tmp.txt')
        net.swap_nan_for_zero()
        # Generate
        net.make_clust(dist_type='cos',
                       views=['N_row_sum', 'N_row_var'],
                       dendro=True,
                       sim_mat=True,
                       filter_sim=0.1,
                       calc_cat_pval=False)

        # Insert into database
        cur.execute(
            'insert into `datasets` (`Name`, `prot_att`, `att_att`, `prot_prot`) values (?, ?, ?, ?)',
            (name, net.export_net_json('viz', indent='no-indent'),
             net.export_net_json('sim_col', indent='no-indent'),
             net.export_net_json('sim_row', indent='no-indent')))
def main():

    from clustergrammer import Network

    # load CCLE cell lines
    filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/ccle_cl_names.txt'
    f = open(filename, 'r')
    lines = f.readlines()
    f.close()

    cl_names = []
    for inst_line in lines:
        inst_line = inst_line.strip()
        cl_names.append(inst_line)

    filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/CCLE_lung.txt'

    net = Network()

    net.load_file(filename)

    ccle_lung = net.export_df()

    cols = ccle_lung.columns.tolist()

    # simplify cols, disguard meta-data
    ######################################
    simple_cols = []

    for inst_col in cols:
        proc_col = inst_col[0].split(': ')[1].replace('NCI', '')

        if 'CALU' in proc_col:
            proc_col = proc_col.replace('CALU', 'Calu-')

        if 'LOU' in proc_col:
            proc_col = proc_col.replace('LOU', 'Lou-')

        if 'CAL' in proc_col:
            proc_col = proc_col.replace('CAL', 'CAL-')

        simple_cols.append(proc_col)

    ccle_lung.columns = simple_cols

    cols = ccle_lung.columns.tolist()

    found_cols = []

    for inst_col in cols:
        if inst_col in cl_names:
            found_cols.append(inst_col)

    # found all cell lines
    print('found ' + str(len(found_cols)))

    # save subset of cell lines that are also found in the CST PTM data
    ccle_cst_lung = ccle_lung[cl_names]

    save_filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/CCLE_CST_lung.txt'
    ccle_cst_lung.to_csv(save_filename, sep='\t')
import time
# import StringIO

start_time = time.time()

# import network class from Network.py

from clustergrammer import Network
net = Network()

net.load_file('txt/rc_two_cats.txt')
# net.load_file('txt/example_tsv.txt')
# net.load_file('txt/col_categories.txt')
# net.load_file('txt/mat_cats.tsv')
# net.load_file('txt/mat_1mb.Txt')
# net.load_file('txt/mnist.txt')
# net.load_file('txt/sim_mat_4_cats.txt')

views = ['N_row_sum','N_row_var']

# # filtering rows and cols by sum 
# net.filter_sum('row', threshold=20)
# net.filter_sum('col', threshold=30)
  
# # keep top rows based on sum 
# net.filter_N_top('row', 10, 'sum')

net.make_clust(dist_type='cos',views=views , dendro=True,
               sim_mat=True, filter_sim=0.1)

# net.produce_view({'N_row_sum':10,'dist':'euclidean'})
Exemplo n.º 33
0
# make network object and load file
from clustergrammer import Network
net = Network()
net.load_file('txt/new_matrix.txt')
# net.add_cats("col",[
#   {
#     "title": "year",
#     "cats": {
#       "1995": [
#         "p2",
#         "p3"
#       ],
#       "1998":[
#           "p1",
#           "p4"
#       ]
#     }
#   },
#   {
#     "title": "s_author",
#     "cats": {
#       "aa": [
#         "p1",
#         "p3"
#       ],
#       "bb":[
#           "p1",
#           "p2",
#           "p3",
#           "p4"
#       ],
Exemplo n.º 34
0
'''
Python 2.7
The clustergrammer python module can be installed using pip:
pip install clustergrammer

or by getting the code from the repo:
https://github.com/MaayanLab/clustergrammer-py
'''

from clustergrammer import Network
net = Network()

# load matrix tsv file
net.load_file('txt/heatmap_features.txt')

net.set_cat_color('row', 1, 'Feature Type: Interactivity', 'yellow')
net.set_cat_color('row', 1, 'Feature Type: Sharing', 'blue')
net.set_cat_color('row', 1, 'Feature Type: Usability', 'orange')
net.set_cat_color('row', 1, 'Feature Type: Biology-Specific', 'red')

net.cluster(dist_type='cos',
            views=[],
            dendro=True,
            filter_sim=0.1,
            calc_cat_pval=False,
            enrichrgram=False)

# write jsons for front-end visualizations
net.write_json_to_file('viz', 'json/mult_view.json', 'indent')
Exemplo n.º 35
0
def make_ccle_matrix_subset():
    '''
  This will save a subset of the downsampled matrix using the proteins of interest
  '''
    from clustergrammer import Network
    import json_scripts

    print('-- load CCLE downsampled data')

    # load downsampled CCLE data
    net = Network()
    net.load_file('CCLE/CCLE_kmeans_ds_col_100.txt')

    df = net.export_df()

    # load proteins of interest
    filename = 'proteins_of_interest/proteins_of_interest.json'
    poi = json_scripts.load_to_dict(filename)

    all_poi = []
    for inst_type in poi:
        all_poi.extend(poi[inst_type])

    # only keep pois that are found in the CCLE
    all_genes = df.index.tolist()

    found_poi = list(set(all_genes) & set(all_poi))

    num_found_poi = len(found_poi)

    print(
        str(num_found_poi) +
        ' proteins of interest were found in the CCLE data')

    # filter dataframe using row list (transpose and transpose-back)
    ##################################################################
    df = df.transpose()
    df = df[found_poi]
    df = df.transpose()

    # save version without protein categories (e.g. kinase)
    df.to_csv('CCLE/CCLE_kmeans_ds_col_100_poi_no_cats.txt', sep='\t')

    row_cats = []

    for inst_gene in found_poi:

        # add protein type to gene names
        found_type = ''
        for inst_type in poi:

            if inst_gene in poi[inst_type]:
                found_type = inst_type

        gene_name = 'gene: ' + inst_gene
        cat_name = 'type: ' + found_type
        inst_tuple = (gene_name, cat_name)

        row_cats.append(inst_tuple)

    # redefine index
    df.index = row_cats

    print('-- save matrix with proteins_of_interest subset')
    df.to_csv('CCLE/CCLE_kmeans_ds_col_100_poi.txt', sep='\t')
Exemplo n.º 36
0
import time
start_time = time.time()

from clustergrammer import Network
net = Network()

net.load_file('txt/rc_two_cats.txt')
# net.load_file('txt/tmp.txt')

views = ['N_row_sum', 'N_row_var']

net.make_clust(dist_type='cos', views=views, dendro=True, sim_mat=True)

net.write_json_to_file('viz', 'json/mult_view.json')
net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json')
net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json')

elapsed_time = time.time() - start_time

print('\n\nelapsed time')
print(elapsed_time)
Exemplo n.º 37
0
'''
The clustergrammer python module can be installed using pip:
pip install clustergrammer

or by getting the code from the repo:
https://github.com/MaayanLab/clustergrammer-py
'''
import os
from clustergrammer import Network

for filename in os.listdir("tsv"):
    name = filename.split(".")[0]
    net = Network()
    # load matrix tsv file
    print name
    net.load_file('tsv/' + name + '.tsv')

    # optional filtering and normalization
    ##########################################
    net.swap_nan_for_zero()

    net.make_clust(dist_type='cos',
                   views=['N_row_sum', 'N_row_var'],
                   dendro=True,
                   sim_mat=True,
                   filter_sim=0.1,
                   calc_cat_pval=False)

    # write jsons for front-end visualizations
    net.write_json_to_file('viz', 'output/' + name + '.json', 'indent')