Exemplo n.º 1
0
def make_clust(net,
               dist_type='cosine',
               run_clustering=True,
               dendro=True,
               requested_views=['pct_row_sum', 'N_row_sum'],
               linkage_type='average',
               sim_mat=False):
    ''' This will calculate multiple views of a clustergram by filtering the
  data and clustering after each filtering. This filtering will keep the top
  N rows based on some quantity (sum, num-non-zero, etc). '''

    from copy import deepcopy
    import calc_clust
    import run_filter
    import make_views
    import scipy

    df = net.dat_to_df()

    threshold = 0.0001
    df = run_filter.df_filter_row(df, threshold)
    df = run_filter.df_filter_col(df, threshold)

    # calculate initial view with no row filtering
    net.df_to_dat(df)

    # preparing to make similarity matrices of rows and cols
    ###########################################################
    # tmp_dist_mat = calc_clust.calc_distance_matrix(net.dat['mat'], 'col',
    #                                                get_sim=True,
    #                                                make_squareform=True,
    #                                                filter_sim_below=0.1)
    #
    # print(tmp_dist_mat)

    # print(net.dat['node_info']['row'])
    # print('\n')
    # print(net.dat['node_info']['col'])

    calc_clust.cluster_row_and_col(net,
                                   dist_type=dist_type,
                                   linkage_type=linkage_type,
                                   run_clustering=run_clustering,
                                   dendro=dendro,
                                   ignore_cat=False)

    all_views = []
    send_df = deepcopy(df)

    if 'N_row_sum' in requested_views:
        all_views = make_views.N_rows(net,
                                      send_df,
                                      all_views,
                                      dist_type=dist_type,
                                      rank_type='sum')

    if 'N_row_var' in requested_views:
        all_views = make_views.N_rows(net,
                                      send_df,
                                      all_views,
                                      dist_type=dist_type,
                                      rank_type='var')

    if 'pct_row_sum' in requested_views:
        all_views = make_views.pct_rows(net,
                                        send_df,
                                        all_views,
                                        dist_type=dist_type,
                                        rank_type='sum')

    if 'pct_row_var' in requested_views:
        all_views = make_views.pct_rows(net,
                                        send_df,
                                        all_views,
                                        dist_type=dist_type,
                                        rank_type='var')

    if sim_mat is True:
        print(
            'make similarity matrices of rows and columns, add to viz data structure'
        )

    net.viz['views'] = all_views
Exemplo n.º 2
0
def N_rows(net, df, all_views, dist_type='cosine', rank_type='sum'):
  from copy import deepcopy
  from __init__ import Network
  import calc_clust, run_filter

  keep_top = ['all', 500, 400, 300, 200, 100, 80, 60, 40, 20, 10]

  df_abs = deepcopy(df['mat'])
  df_abs = df_abs.transpose()

  if rank_type == 'sum':
    tmp_sum = df_abs.sum(axis=0)
  elif rank_type == 'var':
    tmp_sum = df_abs.var(axis=0)

  tmp_sum = tmp_sum.abs()
  tmp_sum.sort_values(inplace=True, ascending=False)
  rows_sorted = tmp_sum.index.values.tolist()

  for inst_keep in keep_top:

    tmp_df = deepcopy(df)

    if inst_keep < len(rows_sorted) or inst_keep == 'all':

      tmp_net = deepcopy(Network())

      if inst_keep != 'all':

        keep_rows = rows_sorted[0:inst_keep]
        tmp_df['mat'] = tmp_df['mat'].ix[keep_rows]

        if 'mat_up' in tmp_df:
          tmp_df['mat_up'] = tmp_df['mat_up'].ix[keep_rows]
          tmp_df['mat_dn'] = tmp_df['mat_dn'].ix[keep_rows]

        tmp_df = run_filter.df_filter_col(tmp_df, 0.001)
        tmp_net.df_to_dat(tmp_df)

      else:
        tmp_net.df_to_dat(tmp_df)

      try:
        try:
          calc_clust.cluster_row_and_col(tmp_net, dist_type, run_clustering=True)
        except:
          calc_clust.cluster_row_and_col(tmp_net, dist_type, run_clustering=False)

        # add view
        inst_view = {}
        inst_view['N_row_' + rank_type] = inst_keep
        inst_view['dist'] = 'cos'
        inst_view['nodes'] = {}
        inst_view['nodes']['row_nodes'] = tmp_net.viz['row_nodes']
        inst_view['nodes']['col_nodes'] = tmp_net.viz['col_nodes']
        all_views.append(inst_view)

      except:
        # print('\t*** did not cluster N filtered view')
        pass

  return all_views
def make_clust(net, dist_type='cosine', run_clustering=True,
                          dendro=True, requested_views=['pct_row_sum', 'N_row_sum'],
                          linkage_type='average', sim_mat=False):

  ''' This will calculate multiple views of a clustergram by filtering the
  data and clustering after each filtering. This filtering will keep the top
  N rows based on some quantity (sum, num-non-zero, etc). '''

  from copy import deepcopy
  import calc_clust
  import run_filter
  import make_views
  import scipy

  df = net.dat_to_df()

  threshold = 0.0001
  df = run_filter.df_filter_row(df, threshold)
  df = run_filter.df_filter_col(df, threshold)

  # calculate initial view with no row filtering
  net.df_to_dat(df)

  # preparing to make similarity matrices of rows and cols
  ###########################################################
  # tmp_dist_mat = calc_clust.calc_distance_matrix(net.dat['mat'], 'col',
  #                                                get_sim=True,
  #                                                make_squareform=True,
  #                                                filter_sim_below=0.1)
  #
  # print(tmp_dist_mat)

  # print(net.dat['node_info']['row'])
  # print('\n')
  # print(net.dat['node_info']['col'])


  calc_clust.cluster_row_and_col(net, dist_type=dist_type,
                                linkage_type=linkage_type,
                                run_clustering=run_clustering,
                                dendro=dendro, ignore_cat=False)

  all_views = []
  send_df = deepcopy(df)

  if 'N_row_sum' in requested_views:
    all_views = make_views.N_rows(net, send_df, all_views,
                                  dist_type=dist_type, rank_type='sum')

  if 'N_row_var' in requested_views:
    all_views = make_views.N_rows(net, send_df, all_views,
                                  dist_type=dist_type, rank_type='var')

  if 'pct_row_sum' in requested_views:
    all_views = make_views.pct_rows(net, send_df, all_views,
                                    dist_type=dist_type, rank_type='sum')

  if 'pct_row_var' in requested_views:
    all_views = make_views.pct_rows(net, send_df, all_views,
                                    dist_type=dist_type, rank_type='var')

  if sim_mat is True:
    print('make similarity matrices of rows and columns, add to viz data structure')

  net.viz['views'] = all_views
Exemplo n.º 4
0
def N_rows(net, df, all_views, dist_type='cosine', rank_type='sum'):
    from copy import deepcopy
    from __init__ import Network
    import calc_clust, run_filter

    keep_top = ['all', 500, 400, 300, 200, 100, 80, 60, 40, 20, 10]

    df_abs = deepcopy(df['mat'])
    df_abs = df_abs.transpose()

    if rank_type == 'sum':
        tmp_sum = df_abs.sum(axis=0)
    elif rank_type == 'var':
        tmp_sum = df_abs.var(axis=0)

    tmp_sum = tmp_sum.abs()
    tmp_sum.sort_values(inplace=True, ascending=False)
    rows_sorted = tmp_sum.index.values.tolist()

    for inst_keep in keep_top:

        tmp_df = deepcopy(df)

        if inst_keep < len(rows_sorted) or inst_keep == 'all':

            tmp_net = deepcopy(Network())

            if inst_keep != 'all':

                keep_rows = rows_sorted[0:inst_keep]
                tmp_df['mat'] = tmp_df['mat'].ix[keep_rows]

                if 'mat_up' in tmp_df:
                    tmp_df['mat_up'] = tmp_df['mat_up'].ix[keep_rows]
                    tmp_df['mat_dn'] = tmp_df['mat_dn'].ix[keep_rows]

                tmp_df = run_filter.df_filter_col(tmp_df, 0.001)
                tmp_net.df_to_dat(tmp_df)

            else:
                tmp_net.df_to_dat(tmp_df)

            try:
                try:
                    calc_clust.cluster_row_and_col(tmp_net,
                                                   dist_type,
                                                   run_clustering=True)
                except:
                    calc_clust.cluster_row_and_col(tmp_net,
                                                   dist_type,
                                                   run_clustering=False)

                # add view
                inst_view = {}
                inst_view['N_row_' + rank_type] = inst_keep
                inst_view['dist'] = 'cos'
                inst_view['nodes'] = {}
                inst_view['nodes']['row_nodes'] = tmp_net.viz['row_nodes']
                inst_view['nodes']['col_nodes'] = tmp_net.viz['col_nodes']
                all_views.append(inst_view)

            except:
                # print('\t*** did not cluster N filtered view')
                pass

    return all_views