def pct_rows(net, df, all_views, dist_type, rank_type): from __init__ import Network from copy import deepcopy import numpy as np import calc_clust, run_filter copy_net = deepcopy(net) if len(net.dat['node_info']['col']['cat']) > 0: cat_key_col = {} for i in range(len(net.dat['nodes']['col'])): cat_key_col[net.dat['nodes']['col'][i]] = \ net.dat['node_info']['col']['cat'][i] all_filt = range(10) all_filt = [i / float(10) for i in all_filt] mat = deepcopy(df['mat']) sum_row = np.sum(mat, axis=1) max_sum = max(sum_row) for inst_filt in all_filt: cutoff = inst_filt * max_sum copy_net = deepcopy(net) inst_df = deepcopy(df) inst_df = run_filter.df_filter_row(inst_df, cutoff, take_abs=False) tmp_net = deepcopy(Network()) tmp_net.df_to_dat(inst_df) try: try: calc_clust.cluster_row_and_col(tmp_net, dist_type=dist_type, run_clustering=True) except: calc_clust.cluster_row_and_col(tmp_net, dist_type=dist_type, run_clustering=False) inst_view = {} inst_view['pct_row_' + rank_type] = inst_filt inst_view['dist'] = 'cos' inst_view['nodes'] = {} inst_view['nodes']['row_nodes'] = tmp_net.viz['row_nodes'] inst_view['nodes']['col_nodes'] = tmp_net.viz['col_nodes'] all_views.append(inst_view) except: pass return all_views
def make_clust(net, dist_type='cosine', run_clustering=True, dendro=True, requested_views=['pct_row_sum', 'N_row_sum'], linkage_type='average', sim_mat=False): ''' This will calculate multiple views of a clustergram by filtering the data and clustering after each filtering. This filtering will keep the top N rows based on some quantity (sum, num-non-zero, etc). ''' from copy import deepcopy import calc_clust import run_filter import make_views import scipy df = net.dat_to_df() threshold = 0.0001 df = run_filter.df_filter_row(df, threshold) df = run_filter.df_filter_col(df, threshold) # calculate initial view with no row filtering net.df_to_dat(df) # preparing to make similarity matrices of rows and cols ########################################################### # tmp_dist_mat = calc_clust.calc_distance_matrix(net.dat['mat'], 'col', # get_sim=True, # make_squareform=True, # filter_sim_below=0.1) # # print(tmp_dist_mat) # print(net.dat['node_info']['row']) # print('\n') # print(net.dat['node_info']['col']) calc_clust.cluster_row_and_col(net, dist_type=dist_type, linkage_type=linkage_type, run_clustering=run_clustering, dendro=dendro, ignore_cat=False) all_views = [] send_df = deepcopy(df) if 'N_row_sum' in requested_views: all_views = make_views.N_rows(net, send_df, all_views, dist_type=dist_type, rank_type='sum') if 'N_row_var' in requested_views: all_views = make_views.N_rows(net, send_df, all_views, dist_type=dist_type, rank_type='var') if 'pct_row_sum' in requested_views: all_views = make_views.pct_rows(net, send_df, all_views, dist_type=dist_type, rank_type='sum') if 'pct_row_var' in requested_views: all_views = make_views.pct_rows(net, send_df, all_views, dist_type=dist_type, rank_type='var') if sim_mat is True: print( 'make similarity matrices of rows and columns, add to viz data structure' ) net.viz['views'] = all_views
def make_clust(net, dist_type='cosine', run_clustering=True, dendro=True, requested_views=['pct_row_sum', 'N_row_sum'], linkage_type='average', sim_mat=False): ''' This will calculate multiple views of a clustergram by filtering the data and clustering after each filtering. This filtering will keep the top N rows based on some quantity (sum, num-non-zero, etc). ''' from copy import deepcopy import calc_clust import run_filter import make_views import scipy df = net.dat_to_df() threshold = 0.0001 df = run_filter.df_filter_row(df, threshold) df = run_filter.df_filter_col(df, threshold) # calculate initial view with no row filtering net.df_to_dat(df) # preparing to make similarity matrices of rows and cols ########################################################### # tmp_dist_mat = calc_clust.calc_distance_matrix(net.dat['mat'], 'col', # get_sim=True, # make_squareform=True, # filter_sim_below=0.1) # # print(tmp_dist_mat) # print(net.dat['node_info']['row']) # print('\n') # print(net.dat['node_info']['col']) calc_clust.cluster_row_and_col(net, dist_type=dist_type, linkage_type=linkage_type, run_clustering=run_clustering, dendro=dendro, ignore_cat=False) all_views = [] send_df = deepcopy(df) if 'N_row_sum' in requested_views: all_views = make_views.N_rows(net, send_df, all_views, dist_type=dist_type, rank_type='sum') if 'N_row_var' in requested_views: all_views = make_views.N_rows(net, send_df, all_views, dist_type=dist_type, rank_type='var') if 'pct_row_sum' in requested_views: all_views = make_views.pct_rows(net, send_df, all_views, dist_type=dist_type, rank_type='sum') if 'pct_row_var' in requested_views: all_views = make_views.pct_rows(net, send_df, all_views, dist_type=dist_type, rank_type='var') if sim_mat is True: print('make similarity matrices of rows and columns, add to viz data structure') net.viz['views'] = all_views