def main(args): """Runs eveything""" # Importing data dat = wideToDesign(args.input, args.design, args.uniqID, logger=logger) # Cleaning from missing data dat.dropMissing() # Getting labels to drop from arguments x = True y = True if "x" in args.labels: x = False if "y" in args.labels: y = False print("x =", x) print("y =", y) #Plotting with dendogram Hierarchical cluster heatmap (HCH) logger.info("Plotting heatmaps") if args.dendogram == True: fh = hm.plotHCHeatmap(dat.wide, hcheatmap=True, cmap=palette.mpl_colormap, xlbls=x, ylbls=y) fh.savefig(args.fig, format="pdf") #Plotting without a dendogram single heatmap else: # Creating figure Handler object fh = figureHandler(proj='2d', figsize=(14, 14)) # Creating plot hm.plotHeatmap(dat.wide, fh.ax[0], cmap=palette.mpl_colormap, xlbls=x, ylbls=y) # formating axis fh.formatAxis(xTitle="sampleID") # Saving figure fh.export(out=args.fig, dpi=300) # Finishing script logger.info("Script Complete!")
def nontechnical_analysis(args, df, mask, C, clustering): # Re-order things more palatably for the user, # based on the results of the technical analysis. # Get the map from the name to the original row index. all_row_names = df.index.values row_index_map = {s: i for i, s in enumerate(all_row_names)} # If some variables are uninformative for clustering, # the correlation matrix and the cluster vector will have smaller # dimensions than the number of rows in the original data frame. remaining_row_names = df[mask].index.values # Count the variables included in the clustering. p = clustering.shape[0] # Count the clusters. k = clustering.max() + 1 # To sort the modules and to sort the variables within the modules, # we want to use absolute values of correlations. C_abs = np.abs(C) # For each cluster, get its indices and its submatrix of C_abs. selections = [] submatrices = [] degrees = np.zeros(p, dtype=float) for i in range(k): selection = np.flatnonzero(clustering == i) selections.append(selection) submatrix = C_abs[np.ix_(selection, selection)] submatrices.append(submatrix) if selection.size > 1: denom = selection.size - 1 degrees[selection] = (submatrix.sum(axis=0) - 1) / denom # Modules should be reordered according to decreasing "average degree". cluster_sizes = [] average_degrees = [] for selection in selections: cluster_sizes.append(selection.size) average_degrees.append(degrees[selection].mean()) module_to_cluster = np.argsort(average_degrees)[::-1] cluster_to_module = {v: k for k, v in enumerate(module_to_cluster)} triples = [( cluster_to_module[clustering[i]], -degrees[i], i, ) for i in range(p)] _a, _b, new_to_old_idx = zip(*sorted(triples)) # Make a csv file if requested. header = ('Gene', 'Module', 'Entry Index', 'Average Degree', 'Degree') with open(args.out, 'wb') as fout: writer = csv.writer( fout, 'excel-tab') #problematic; need to switch to tsv file! writer.writerow(header) for old_i in new_to_old_idx: name = remaining_row_names[old_i] cluster = clustering[old_i] row = ( name, cluster_to_module[cluster] + 1, row_index_map[name] + 1, average_degrees[cluster], degrees[old_i], ) writer.writerow(row) #Create Output fh1 = figureHandler(proj="2d") fh2 = figureHandler(proj="2d") fh3 = figureHandler(proj="2d") # Prepare to create the sorted heatmaps. (fh2) C_sorted = C[np.ix_(new_to_old_idx, new_to_old_idx)] clustering_new = clustering[np.ix_(new_to_old_idx)] # Draw the third heatmap (smoothed). # Make a smoothed correlation array. (fh3) S = expansion(clustering_new) block_mask = S.dot(S.T) denom = np.outer(S.sum(axis=0), S.sum(axis=0)) small = S.T.dot(C_sorted).dot(S) / denom C_all_smoothed = S.dot(small).dot(S.T) C_smoothed = (C_all_smoothed * (1 - block_mask) + C_sorted * block_mask) # Getting list of names for heatmaps 2 and 3 hpnames = [remaining_row_names[old_i] for old_i in new_to_old_idx] # Plot using something like http://stackoverflow.com/questions/15988413/ # Drawing heatmaps # Draw first heatmap [C] hm.plotHeatmap(C, fh1.ax[0], cmap=palette.mpl_colormap, xlbls=remaining_row_names, ylbls=remaining_row_names) fh1.formatAxis(xTitle="sampleID", figTitle="Correlations") # Draw second heatmap [C_sorted](reordered according to the clustering). hm.plotHeatmap(C_sorted, fh2.ax[0], cmap=palette.mpl_colormap, xlbls=hpnames, ylbls=hpnames) fh2.formatAxis(xTitle="sampleID", figTitle="Re-Ordered correlations") # Draw the heatmap [C_smoothed](smoothed version of C_sorted) hm.plotHeatmap(C_smoothed, fh3.ax[0], cmap=palette.mpl_colormap, xlbls=hpnames, ylbls=hpnames) fh3.formatAxis(xTitle="sampleID", figTitle="Smoothed correlations") #Create output from maps with PdfPages(args.figure) as pdf: fh1.addToPdf(pdf) fh2.addToPdf(pdf) fh3.addToPdf(pdf)