def exp_heatmap_json(request): import pandas as pd from clustergrammer import Network columns = [i.name for i in Exp._meta.get_fields()] #exps = Exp.objects.all().using("expDb").values_list("gene_id", "control_0", "control_1", "control_2", "treated_0", "treated_1", "treated_2") exps = Exp.objects.all().using("expDb").values() df = pd.DataFrame(list(exps), columns=columns) df.index = df.gene_id df = df.loc[:, df.columns[1:]] net = Network() net.load_df(df) # Z-score normalize the rows net.normalize(axis='row', norm_type='zscore', keep_orig=True) # filter for the top 100 columns based on their absolute value sum net.filter_N_top('col', 100, 'sum') # cluster using default parameters net.cluster() # save visualization JSON to file for use by front end data = net.export_net_json('viz') data = json.loads(data) #print(data) response = { 'data': data, } return JsonResponse(response, content_type='application/json')
def make_viz_from_df(df, filename): from clustergrammer import Network net = Network() net.df_to_dat(df) net.swap_nan_for_zero() # zscore first to get the columns distributions to be similar net.normalize(axis='col', norm_type='zscore', keep_orig=True) # filter the rows to keep the perts with the largest normalizes values net.filter_N_top('row', 2000) num_coluns = net.dat['mat'].shape[1] if num_coluns < 50: # views = ['N_row_sum', 'N_row_var'] views = ['N_row_sum'] net.make_clust(dist_type='cos', views=views) filename = 'json/' + filename.split('/')[1].replace('.gct', '') + '.json' net.write_json_to_file('viz', filename)
def make_phos_homepage_viz(): from clustergrammer import Network net = Network() filename = 'lung_cellline_3_1_16/lung_cellline_phospho/' + \ 'lung_cellline_TMT_phospho_combined_ratios.tsv' net.load_file(filename) # quantile normalize to normalize cell lines net.normalize(axis='col', norm_type='qn') # only keep most differentially regulated PTMs net.filter_N_top('row', 250, 'sum') # take zscore of rows net.normalize(axis='row', norm_type='zscore', keep_orig=True) net.swap_nan_for_zero() # threshold filter PTMs net.filter_threshold('row', threshold=1.75, num_occur=3) views = ['N_row_sum', 'N_row_var'] net.make_clust(dist_type='cos', views=views, dendro=True, sim_mat=True, calc_cat_pval=True) net.write_json_to_file('viz', 'json/homepage_phos.json', 'indent')
def process_GCT_and_export_tsv(): from clustergrammer import Network filename = 'gcts/LDS-1003.gct' print('exporting processed GCT as tsv file') df = load_file(filename) net = Network() net.df_to_dat(df) net.swap_nan_for_zero() # zscore first to get the columns distributions to be similar net.normalize(axis='col', norm_type='zscore', keep_orig=True) # filter the rows to keep the perts with the largest normalizes values net.filter_N_top('row', 200) net.write_matrix_to_tsv('txt/example_gct_export.txt')
def make_json_from_tsv(name): ''' make a clustergrammer json from a tsv file ''' from clustergrammer import Network print('\n' + name) net = Network() filename = 'txt/'+ name + '.txt' net.load_file(filename) df = net.dat_to_df() net.swap_nan_for_zero() # zscore first to get the columns distributions to be similar net.normalize(axis='col', norm_type='zscore', keep_orig=True) # filter the rows to keep the perts with the largest normalizes values net.filter_N_top('row', 1000) num_rows = net.dat['mat'].shape[0] num_cols = net.dat['mat'].shape[1] print('num_rows ' + str(num_rows)) print('num_cols ' + str(num_cols)) if num_cols < 50 or num_rows < 1000: views = ['N_row_sum'] net.make_clust(dist_type='cos', views=views) export_filename = 'json/' + name + '.json' net.write_json_to_file('viz', export_filename) else: print('did not cluster, too many columns ')
def make_viz_from_df(df, filename): from clustergrammer import Network net = Network() net.df_to_dat(df) net.swap_nan_for_zero() # zscore first to get the columns distributions to be similar net.normalize(axis='col', norm_type='zscore', keep_orig=True) # filter the rows to keep the perts with the largest normalizes values net.filter_N_top('row', 2000) num_coluns = net.dat['mat'].shape[1] if num_coluns < 50: # views = ['N_row_sum', 'N_row_var'] views = ['N_row_sum'] net.make_clust(dist_type='cos', views=views) filename = 'json/' + filename.split('/')[1].replace('.gct','') + '.json' net.write_json_to_file('viz', filename)
# make network object and load DataFrame, df import sys import pandas as pd from clustergrammer import Network df = pd.read_csv(sys.argv[1], header=True, index_col=0, sep='\t') net = Network() net.load_df(df) # Z-score normalize the rows net.normalize(axis='row', norm_type='zscore', keep_orig=True) # filter for the top 100 columns based on their absolute value sum net.filter_N_top('col', 100, 'sum') # cluster using default parameters net.cluster() # save visualization JSON to file for use by front end net.write_json_to_file('viz', sys.argv[2])