Exemplo n.º 1
0
def get_clusters(analysis, data_types):
    cluster_json = {}
    for data_type in data_types:
        analysis_data = get_last_analysis_data(analysis, data_type)
        data_df, design_df = get_dataframes(analysis_data, PKS[data_type],
                                            SAMPLE_COL)
        if data_type == GENOMICS:
            inference = WebOmicsInference(data_df, design_df, data_type)
            df = inference.standardize_df(inference.data_df)
        elif data_type == PROTEOMICS or data_type == METABOLOMICS:
            inference = WebOmicsInference(data_df,
                                          design_df,
                                          data_type,
                                          min_value=5000)
            df = inference.standardize_df(inference.data_df, log=True)
        if not df.empty:
            net = Network()
            net.load_df(df)
            net.cluster()
            data_type_label = {
                GENOMICS: 'gene',
                PROTEOMICS: 'protein',
                METABOLOMICS: 'compound'
            }
            label = data_type_label[data_type]
            json_data = net.export_net_json()
            cluster_json[label] = json_data
    return cluster_json
Exemplo n.º 2
0
def exp_heatmap_json(request):
    import pandas as pd
    from clustergrammer import Network
    columns = [i.name for i in Exp._meta.get_fields()]
    #exps = Exp.objects.all().using("expDb").values_list("gene_id", "control_0", "control_1", "control_2", "treated_0", "treated_1", "treated_2")
    exps = Exp.objects.all().using("expDb").values()
    df = pd.DataFrame(list(exps), columns=columns)
    df.index = df.gene_id
    df = df.loc[:, df.columns[1:]]
    net = Network()
    net.load_df(df)

    # Z-score normalize the rows
    net.normalize(axis='row', norm_type='zscore', keep_orig=True)

    # filter for the top 100 columns based on their absolute value sum
    net.filter_N_top('col', 100, 'sum')

    # cluster using default parameters
    net.cluster()

    # save visualization JSON to file for use by front end
    data = net.export_net_json('viz')
    data = json.loads(data)
    #print(data)
    response = {
        'data': data,
    }
    return JsonResponse(response, content_type='application/json')
Exemplo n.º 3
0
def prepare_heatmap(matrix_input, html_file, html_dir, tools_dir, categories, distance, linkage):
    # prepare directory and html
    os.mkdir(html_dir)

    env = Environment(loader=FileSystemLoader(tools_dir + "/templates"))
    template = env.get_template("clustergrammer.template")
    overview = template.render()
    with open(html_file, "w") as outf:
        outf.write(overview)

    json_output = html_dir + "/mult_view.json"

    net = Network()
    net.load_file(matrix_input)
    if (categories['row']):
        net.add_cats('row', categories['row'])
    if (categories['col']):
        net.add_cats('col', categories['col'])
    net.cluster(dist_type=distance, linkage_type=linkage)
    net.write_json_to_file('viz', json_output)
Exemplo n.º 4
0
    def get_clustergrammer_json(self, outfile):

        # Create network
        net = Network()

        # Load file
        net.load_df(self.expression_dataframe)

        # Add categories
        try:
            net.add_cats('col', self.sample_cats)
        except:
            pass

        try:
            # calculate clustering using default parameters
            net.cluster()

            # save visualization JSON to file for use by front end
            net.write_json_to_file('viz', outfile)
        except:
            os.system('touch {outfile}'.format(**locals()))
    def prepare_clustergrammer_data(self,
                                    outfname='clustergrammer_data.json',
                                    G=None):
        """for a distance matrix, output a clustergrammer JSON file
        that clustergrammer-js can use

        for now it loads the clustergrammer-py module from local dev files
        TODO: once changes are pulled into clustergrammer-py, we can use the actual module (pip)

        :outfname: filename for the output json
        :G: networkx graph (use self.G_sym by default)

        """
        G = self.G_sym or self.G
        # if Z is None:
        #     G = self.G_sym or self.G
        #     Z = self.get_linkage(G)
        clustergrammer_py_dev_dir = '../clustergrammer/clustergrammer-py/'
        sys.path.insert(0, clustergrammer_py_dev_dir)
        from clustergrammer import Network as ClustergrammerNetwork
        start = timer()
        d = nx.to_numpy_matrix(G)
        df = pd.DataFrame(d, index=G.nodes(), columns=G.nodes())
        net = ClustergrammerNetwork()
        # net.load_file(infname)
        # net.load_file(mat)
        net.load_df(df)
        net.cluster(dist_type='precalculated')
        logger.debug("done loading and clustering. took {}".format(
            format_timespan(timer() - start)))

        logger.debug("writing to {}".format(outfname))
        start = timer()
        net.write_json_to_file('viz', outfname)
        logger.debug("done writing file {}. took {}".format(
            outfname, format_timespan(timer() - start)))
# make network object and load file
from clustergrammer import Network
net = Network()
net.load_file('mult_view.tsv')




# Z-score normalize the rows
#net.normalize(axis='row', norm_type='zscore', keep_orig=True)





# calculate clustering using default parameters
net.cluster()

# save visualization JSON to file for use by front end
net.write_json_to_file('viz', 'mult_view.json')



#	needs pandas and sklearn as well
#	pip install --user --upgrade clustergrammer pandas sklearn
Exemplo n.º 7
0
'''
Python 2.7
The clustergrammer python module can be installed using pip:
pip install clustergrammer

or by getting the code from the repo:
https://github.com/MaayanLab/clustergrammer-py
'''

from clustergrammer import Network
net = Network()

# load matrix tsv file
net.load_file('txt/heatmap_features.txt')

net.set_cat_color('row', 1, 'Feature Type: Interactivity', 'yellow')
net.set_cat_color('row', 1, 'Feature Type: Sharing', 'blue')
net.set_cat_color('row', 1, 'Feature Type: Usability', 'orange')
net.set_cat_color('row', 1, 'Feature Type: Biology-Specific', 'red')

net.cluster(dist_type='cos',
            views=[],
            dendro=True,
            filter_sim=0.1,
            calc_cat_pval=False,
            enrichrgram=False)

# write jsons for front-end visualizations
net.write_json_to_file('viz', 'json/mult_view.json', 'indent')
Exemplo n.º 8
0
# for inst_col in df.columns.tolist():
#   inst_val = inst_val + 1
#   new_col = (inst_col, 'Cat: C-' + str(inst_val), 'Val: ' + str(inst_val))
#   new_cols.append(new_col)

# new_cols = [(x, 'Cat-1: A', 'Cat-2: B') for x in df.columns]
# new_cols = [(x, 'Cat-1: A', 'Cat-2: B', 'Cat-3: C') for x in df.columns]
df.index = new_rows
df.columns = new_cols

net.load_df(df)

net.cluster(dist_type='cos',
            views=['N_row_sum', 'N_row_var'],
            dendro=True,
            sim_mat=False,
            filter_sim=0.1,
            calc_cat_pval=False,
            enrichrgram=True)

# write jsons for front-end visualizations
net.write_json_to_file('viz', 'data/big_data/custom.json', 'no-indent')

# net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent')
# net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent')

# net.normalize(axis='row', norm_type='zscore')

net.cluster(dist_type='cos',
            views=['N_row_sum', 'N_row_var'],
            dendro=True,
Exemplo n.º 9
0
ids = delta_f.columns.map(lambda x: x.split('|')[0])
fout = open("%s_heatmap_matrix.txt" % args.d, 'w')
fout.write("\t\t%s\n" % ('\t'.join(tfs)))

cls = []
for i in ids:
    if ann_dict.get(i, ['NA'])[0] == 'NA':
        cls.append("Cell Line: %s" % ('NA'))
    else:
        cls.append("Cell Line: %s" % (ann_dict[i][0]))
fout.write("\t\t%s\n" % ('\t'.join(cls)))

ts = []
for i in ids:
    if ann_dict.get(i, ['NA', 'NA'])[1] == 'NA':
        ts.append("Tissue: %s" % ('NA'))
    else:
        ts.append("Tissue: %s" % (ann_dict[i][1]))
fout.write("\t\t%s\n" % ('\t'.join(ts)))

for i in range(status.shape[0]):
    fout.write('%s\t%s\t%s\n' %
               ("Gene: %s" % genes[i], "Input Gene: %s" % status[i], '\t'.join(
                   delta_f.iloc[i, :].map(str))))
fout.close()

net.load_file("%s_heatmap_matrix.txt" % args.d)
net.cluster()
net.write_json_to_file('viz', '%s_mult_view.json' % args.d)
Exemplo n.º 10
0
    "#CC0744", "#C0B9B2", "#C2FF99", "#001E09", "#00489C", "#6F0062",
    "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66",
    "#885578", "#0089A3", "#FF8A9A", "#D157A0", "#BEC459", "#456648",
    "#0086ED", "#886F4C", "#34362D", "#B4A8BD", "#00A6AA", "#452C2C",
    "#636375", "#A3C8C9", "#FF913F", "#938A81", "#575329", "#00FECF",
    "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00",
    "#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600",
    "#D790FF", "#9B9700", "#549E79", "#FFF69F", "#201625", "#CB7E98",
    "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329", "#5B4534",
    "#FDE8DC", "#404E55", "#FAD09F", "#A4E804", "#f58231", "#324E72", "#402334"
]
for i in range(len(color_array3)):
    label = 'SC3 label: _' + str(i) + '_'
    net.set_cat_color(axis='col',
                      cat_index=1,
                      cat_name=label,
                      inst_color=color_array3[i])
#console.log(color_array[i]);

if use_user_label == '1':
    for j in range(len(unique_array)):
        userlabel = 'User\'s label: _' + str(unique_array[j]) + '_'
        net.set_cat_color(axis='col',
                          cat_index=2,
                          cat_name=userlabel,
                          inst_color=color_array3[63 - j])
net.cluster(dist_type='cos', enrichrgram=True, run_clustering=False)
# write jsons for front-end visualizations
out = wd + 'json/' + outname + '.json'
net.write_json_to_file('viz', out, 'indent')
net.load_file('txt/rc_two_cats.txt')
# net.load_file('txt/ccle_example.txt')
# net.load_file('txt/rc_val_cats.txt')
# net.load_file('txt/number_labels.txt')
# net.load_file('txt/mnist.txt')
# net.load_file('txt/tuple_cats.txt')
# net.load_file('txt/example_tsv.txt')

# net.enrichrgram('KEA_2015')

# optional filtering and normalization
##########################################
# net.filter_sum('row', threshold=20)
# net.normalize(axis='col', norm_type='zscore', keep_orig=True)
# net.filter_N_top('row', 250, rank_type='sum')
# net.filter_threshold('row', threshold=3.0, num_occur=4)
# net.swap_nan_for_zero()
# net.set_cat_color('col', 1, 'Category: one', 'blue')

  # net.make_clust()
  # net.dendro_cats('row', 5)

net.cluster(dist_type='cos',views=['N_row_sum', 'N_row_var'] , dendro=True,
               sim_mat=True, filter_sim=0.1, calc_cat_pval=False, enrichrgram=
               False, run_clustering=True)

# write jsons for front-end visualizations
net.write_json_to_file('viz', 'json/mult_view.json', 'indent')
net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent')
net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent')
Exemplo n.º 12
0
from clustergrammer import Network

if __name__ == "__main__":

    matrix_filename = sys.argv[1]
    html_output_filename = sys.argv[2]

    print('loading file...')
    net = Network()
    # load matrix file
    net.load_file(matrix_filename)
    print('done')

    # cluster using default parameters
    print('clustering the matrix...')
    net.cluster(dist_type='jaccard', linkage_type='complete')
    #    net.cluster(run_clustering=False)
    print('done')

    # save visualization JSON to file for use by front end
    print('saving results in json file...')
    json_filename = matrix_filename + '.json'
    net.write_json_to_file('viz', json_filename)
    print('done')

    # creating the html page
    print('creating the html page...')
    network_data = ''
    file = open(json_filename, 'rt')
    for line in file:
        network_data += line
Exemplo n.º 13
0
The clustergrammer python module can be installed using pip:
pip install clustergrammer

or by getting the code from the repo:
https://github.com/MaayanLab/clustergrammer-py
'''

from clustergrammer import Network
net = Network()

# load matrix tsv file
net.load_file('../data_mats/df_predict_merge.txt')

net.set_cat_color('row', 1, 'virus: chik', 'blue')
net.set_cat_color('row', 1, 'virus: zika', 'red')
net.cluster(enrichrgram=False)

# transfer colors from original to predicted categories
########################################################

# make category colors the same for Chik groups
for inst_cat in net.viz['cat_colors']['row']['cat-1']:
    new_cat = inst_cat.replace('original', 'predict')
    inst_color = net.viz['cat_colors']['row']['cat-1'][inst_cat]
    net.set_cat_color('row', 3, new_cat, inst_color)

net.cluster(enrichrgram=False)

# write jsons for front-end visualizations
net.write_json_to_file('viz', 'json/mult_view.json', 'indent')
# net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent')
Exemplo n.º 14
0
def clust_from_response(response_list):
    from clustergrammer import Network
    import scipy
    import json
    import pandas as pd
    import math
    from copy import deepcopy

    # print('----------------------')
    # print('enrichr_clust_from_response')
    # print('----------------------')

    ini_enr = transfer_to_enr_dict(response_list)

    enr = []
    scores = {}
    score_types = ['combined_score', 'pval', 'zscore']

    for score_type in score_types:
        scores[score_type] = pd.Series()

    for inst_enr in ini_enr:
        if inst_enr['combined_score'] > 0:

            # make series of enriched terms with scores
            for score_type in score_types:

                # collect the scores of the enriched terms
                if score_type == 'combined_score':
                    scores[score_type][inst_enr['name']] = inst_enr[score_type]
                if score_type == 'pval':
                    scores[score_type][inst_enr['name']] = -math.log(
                        inst_enr[score_type])
                if score_type == 'zscore':
                    scores[score_type][
                        inst_enr['name']] = -inst_enr[score_type]

            # keep enrichement values
            enr.append(inst_enr)

    # sort and normalize the scores
    for score_type in score_types:
        scores[score_type] = scores[score_type] / scores[score_type].max()
        scores[score_type].sort_values(ascending=False)

    number_of_enriched_terms = len(scores['combined_score'])

    enr_score_types = ['combined_score', 'pval', 'zscore']

    if number_of_enriched_terms < 10:
        num_dict = {'ten': 10}
    elif number_of_enriched_terms < 20:
        num_dict = {'ten': 10, 'twenty': 20}
    else:
        num_dict = {'ten': 10, 'twenty': 20, 'thirty': 30}

    # gather lists of top scores
    top_terms = {}
    for enr_type in enr_score_types:
        top_terms[enr_type] = {}
        for num_terms in list(num_dict.keys()):
            inst_num = num_dict[num_terms]
            top_terms[enr_type][num_terms] = scores[enr_type].index.tolist(
            )[:inst_num]

    # gather the terms that should be kept - they are at the top of the score list
    keep_terms = []
    for inst_enr_score in top_terms:
        for tmp_num in list(num_dict.keys()):
            keep_terms.extend(top_terms[inst_enr_score][tmp_num])

    keep_terms = list(set(keep_terms))

    # keep enriched terms that are at the top 10 based on at least one score
    keep_enr = []
    for inst_enr in enr:
        if inst_enr['name'] in keep_terms:
            keep_enr.append(inst_enr)

    # fill in full matrix
    #######################

    # genes
    row_node_names = []
    # enriched terms
    col_node_names = []

    # gather information from the list of enriched terms
    for inst_enr in keep_enr:
        col_node_names.append(inst_enr['name'])
        row_node_names.extend(inst_enr['int_genes'])

    row_node_names = sorted(list(set(row_node_names)))

    net = Network()
    net.dat['nodes']['row'] = row_node_names
    net.dat['nodes']['col'] = col_node_names
    net.dat['mat'] = scipy.zeros([len(row_node_names), len(col_node_names)])

    for inst_enr in keep_enr:

        inst_term = inst_enr['name']
        col_index = col_node_names.index(inst_term)

        # use combined score for full matrix - will not be seen in viz
        tmp_score = scores['combined_score'][inst_term]
        net.dat['node_info']['col']['value'].append(tmp_score)

        for inst_gene in inst_enr['int_genes']:
            row_index = row_node_names.index(inst_gene)

            # save association
            net.dat['mat'][row_index, col_index] = 1

    # cluster full matrix
    #############################
    # do not make multiple views
    views = ['']

    if len(net.dat['nodes']['row']) > 1:
        net.cluster(dist_type='jaccard', views=views, dendro=False)
    else:
        net.cluster(dist_type='jaccard',
                    views=views,
                    dendro=False,
                    run_clustering=False)

    # get dataframe from full matrix
    df = net.dat_to_df()

    for score_type in score_types:

        for num_terms in num_dict:

            inst_df = deepcopy(df)
            inst_net = deepcopy(Network())

            inst_df = inst_df[top_terms[score_type][num_terms]]

            # load back into net
            inst_net.df_to_dat(inst_df)

            # make views
            if len(net.dat['nodes']['row']) > 1:
                inst_net.cluster(dist_type='jaccard',
                                 views=['N_row_sum'],
                                 dendro=False)
            else:
                inst_net.cluster(dist_type='jaccard',
                                 views=['N_row_sum'],
                                 dendro=False,
                                 run_clustering=False)

            inst_views = inst_net.viz['views']

            # add score_type to views
            for inst_view in inst_views:

                inst_view['N_col_sum'] = num_dict[num_terms]

                inst_view['enr_score_type'] = score_type

                # add values to col_nodes and order according to rank
                for inst_col in inst_view['nodes']['col_nodes']:

                    inst_col['rank'] = len(
                        top_terms[score_type]
                        [num_terms]) - top_terms[score_type][num_terms].index(
                            inst_col['name'])

                    inst_name = inst_col['name']
                    inst_col['value'] = scores[score_type][inst_name]

            # add views to main network
            net.viz['views'].extend(inst_views)

    return net