Пример #1
0
def exp_heatmap_json(request):
    import pandas as pd
    from clustergrammer import Network
    columns = [i.name for i in Exp._meta.get_fields()]
    #exps = Exp.objects.all().using("expDb").values_list("gene_id", "control_0", "control_1", "control_2", "treated_0", "treated_1", "treated_2")
    exps = Exp.objects.all().using("expDb").values()
    df = pd.DataFrame(list(exps), columns=columns)
    df.index = df.gene_id
    df = df.loc[:, df.columns[1:]]
    net = Network()
    net.load_df(df)

    # Z-score normalize the rows
    net.normalize(axis='row', norm_type='zscore', keep_orig=True)

    # filter for the top 100 columns based on their absolute value sum
    net.filter_N_top('col', 100, 'sum')

    # cluster using default parameters
    net.cluster()

    # save visualization JSON to file for use by front end
    data = net.export_net_json('viz')
    data = json.loads(data)
    #print(data)
    response = {
        'data': data,
    }
    return JsonResponse(response, content_type='application/json')
Пример #2
0
def make_viz_from_df(df, filename):
    from clustergrammer import Network

    net = Network()

    net.df_to_dat(df)
    net.swap_nan_for_zero()

    # zscore first to get the columns distributions to be similar
    net.normalize(axis='col', norm_type='zscore', keep_orig=True)

    # filter the rows to keep the perts with the largest normalizes values
    net.filter_N_top('row', 2000)

    num_coluns = net.dat['mat'].shape[1]

    if num_coluns < 50:
        # views = ['N_row_sum', 'N_row_var']
        views = ['N_row_sum']
        net.make_clust(dist_type='cos', views=views)

        filename = 'json/' + filename.split('/')[1].replace('.gct',
                                                            '') + '.json'

        net.write_json_to_file('viz', filename)
Пример #3
0
def make_phos_homepage_viz():

    from clustergrammer import Network
    net = Network()

    filename = 'lung_cellline_3_1_16/lung_cellline_phospho/' + \
      'lung_cellline_TMT_phospho_combined_ratios.tsv'

    net.load_file(filename)

    # quantile normalize to normalize cell lines
    net.normalize(axis='col', norm_type='qn')

    # only keep most differentially regulated PTMs
    net.filter_N_top('row', 250, 'sum')

    # take zscore of rows
    net.normalize(axis='row', norm_type='zscore', keep_orig=True)

    net.swap_nan_for_zero()

    # threshold filter PTMs
    net.filter_threshold('row', threshold=1.75, num_occur=3)

    views = ['N_row_sum', 'N_row_var']
    net.make_clust(dist_type='cos',
                   views=views,
                   dendro=True,
                   sim_mat=True,
                   calc_cat_pval=True)

    net.write_json_to_file('viz', 'json/homepage_phos.json', 'indent')
Пример #4
0
def process_GCT_and_export_tsv():
  from clustergrammer import Network

  filename = 'gcts/LDS-1003.gct'
  print('exporting processed GCT as tsv file')

  df = load_file(filename)

  net = Network()

  net.df_to_dat(df)
  net.swap_nan_for_zero()

  # zscore first to get the columns distributions to be similar
  net.normalize(axis='col', norm_type='zscore', keep_orig=True)

  # filter the rows to keep the perts with the largest normalizes values
  net.filter_N_top('row', 200)

  net.write_matrix_to_tsv('txt/example_gct_export.txt')
Пример #5
0
def process_GCT_and_export_tsv():
    from clustergrammer import Network

    filename = 'gcts/LDS-1003.gct'
    print('exporting processed GCT as tsv file')

    df = load_file(filename)

    net = Network()

    net.df_to_dat(df)
    net.swap_nan_for_zero()

    # zscore first to get the columns distributions to be similar
    net.normalize(axis='col', norm_type='zscore', keep_orig=True)

    # filter the rows to keep the perts with the largest normalizes values
    net.filter_N_top('row', 200)

    net.write_matrix_to_tsv('txt/example_gct_export.txt')
def make_json_from_tsv(name):
  '''
  make a clustergrammer json from a tsv file
  '''
  from clustergrammer import Network

  print('\n' + name)

  net = Network()

  filename = 'txt/'+ name + '.txt'

  net.load_file(filename)

  df = net.dat_to_df()

  net.swap_nan_for_zero()

  # zscore first to get the columns distributions to be similar
  net.normalize(axis='col', norm_type='zscore', keep_orig=True)

  # filter the rows to keep the perts with the largest normalizes values
  net.filter_N_top('row', 1000)

  num_rows = net.dat['mat'].shape[0]
  num_cols = net.dat['mat'].shape[1]

  print('num_rows ' + str(num_rows))
  print('num_cols ' + str(num_cols))

  if num_cols < 50 or num_rows < 1000:

    views = ['N_row_sum']
    net.make_clust(dist_type='cos', views=views)
    export_filename = 'json/' + name + '.json'
    net.write_json_to_file('viz', export_filename)

  else:
    print('did not cluster, too many columns ')
Пример #7
0
def make_viz_from_df(df, filename):
  from clustergrammer import Network

  net = Network()

  net.df_to_dat(df)
  net.swap_nan_for_zero()

  # zscore first to get the columns distributions to be similar
  net.normalize(axis='col', norm_type='zscore', keep_orig=True)

  # filter the rows to keep the perts with the largest normalizes values
  net.filter_N_top('row', 2000)

  num_coluns = net.dat['mat'].shape[1]

  if num_coluns < 50:
    # views = ['N_row_sum', 'N_row_var']
    views = ['N_row_sum']
    net.make_clust(dist_type='cos', views=views)

    filename = 'json/' + filename.split('/')[1].replace('.gct','') + '.json'

    net.write_json_to_file('viz', filename)
Пример #8
0
# make network object and load DataFrame, df
import sys
import pandas as pd
from clustergrammer import Network
df = pd.read_csv(sys.argv[1], header=True, index_col=0, sep='\t')
net = Network()
net.load_df(df)

# Z-score normalize the rows
net.normalize(axis='row', norm_type='zscore', keep_orig=True)

# filter for the top 100 columns based on their absolute value sum
net.filter_N_top('col', 100, 'sum')

# cluster using default parameters
net.cluster()

# save visualization JSON to file for use by front end
net.write_json_to_file('viz', sys.argv[2])