def eig_correlate_matrices(hic_data1, hic_data2, nvect=6, normalized=False, savefig=None, show=False, savedata=None, remove_bad_columns=True, **kwargs): """ Compare the iteractions of two Hi-C matrices using their 6 first eigenvectors, with pearson correlation :param hic_data1: Hi-C-data object :param hic_data2: Hi-C-data object :param 6 nvect: number of eigenvectors to compare :param None savefig: path to save the plot :param False show: displays the plot :param False normalized: use normalized data :param True remove_bads: computes the union of bad columns between samples and exclude them from the comparison :param kwargs: any argument to pass to matplotlib imshow function :returns: matrix of correlations """ data1 = hic_data1.get_matrix(normalized=normalized) data2 = hic_data2.get_matrix(normalized=normalized) ## reduce matrices to remove bad columns if remove_bad_columns: # union of bad columns bads = hic_data1.bads.copy() bads.update(hic_data2.bads) # remove them form both matrices for bad in sorted(bads, reverse=True): del(data1[bad]) del(data2[bad]) for i in xrange(len(data1)): _ = data1[i].pop(bad) _ = data2[i].pop(bad) # get the log size = len(data1) data1 = nozero_log(data1, np.log2) data2 = nozero_log(data2, np.log2) # get the eigenvectors ev1, evect1 = eigh(data1) ev2, evect2 = eigh(data2) corr = [[0 for _ in xrange(nvect)] for _ in xrange(nvect)] # sort eigenvectors according to their eigenvalues => first is last!! sort_perm = ev1.argsort() ev1.sort() evect1 = evect1[sort_perm] sort_perm = ev2.argsort() ev2.sort() evect2 = evect2[sort_perm] # calculate Pearson correlation for i in xrange(nvect): for j in xrange(nvect): corr[i][j] = abs(pearsonr(evect1[:,-i-1], evect2[:,-j-1])[0]) # plot axe = plt.axes([0.1, 0.1, 0.6, 0.8]) cbaxes = plt.axes([0.85, 0.1, 0.03, 0.8]) if show or savefig: im = axe.imshow(corr, interpolation="nearest",origin='lower', **kwargs) axe.set_xlabel('Eigen Vectors exp. 1') axe.set_ylabel('Eigen Vectors exp. 2') axe.set_xticks(range(nvect)) axe.set_yticks(range(nvect)) axe.set_xticklabels(range(1, nvect + 2)) axe.set_yticklabels(range(1, nvect + 2)) axe.xaxis.set_tick_params(length=0, width=0) axe.yaxis.set_tick_params(length=0, width=0) cbar = plt.colorbar(im, cax = cbaxes ) cbar.ax.set_ylabel('Pearson correlation', rotation=90*3, verticalalignment='bottom') axe2 = axe.twinx() axe2.set_yticks(range(nvect)) axe2.set_yticklabels(['%.1f' % (e) for e in ev2[-nvect:][::-1]]) axe2.set_ylabel('corresponding Eigen Values exp. 2', rotation=90*3, verticalalignment='bottom') axe2.set_ylim((-0.5, nvect - 0.5)) axe2.yaxis.set_tick_params(length=0, width=0) axe3 = axe.twiny() axe3.set_xticks(range(nvect)) axe3.set_xticklabels(['%.1f' % (e) for e in ev1[-nvect:][::-1]]) axe3.set_xlabel('corresponding Eigen Values exp. 1') axe3.set_xlim((-0.5, nvect - 0.5)) axe3.xaxis.set_tick_params(length=0, width=0) axe.set_ylim((-0.5, nvect - 0.5)) axe.set_xlim((-0.5, nvect - 0.5)) if savefig: tadbit_savefig(savefig) if show: plt.show() plt.close('all') if savedata: out = open(savedata, 'w') out.write('# ' + '\t'.join(['Eigen Vector %s'% i for i in xrange(nvect)]) + '\n') for i in xrange(nvect): out.write('\t'.join([str(corr[i][j]) for j in xrange(nvect)]) + '\n') out.close() if kwargs.get('get_bads', False): return corr, bads else: return corr
def draw_map(data, genome_seq, cumcs, savefig, show, one=False, clim=None, cmap='jet', decay=False, perc=10, name=None, cistrans=None, decay_resolution=10000, normalized=False, max_diff=None): _ = plt.figure(figsize=(15.,12.5)) if not max_diff: max_diff = len(data) ax1 = plt.axes([0.34, 0.08, 0.6, 0.7205]) ax2 = plt.axes([0.07, 0.65, 0.21, 0.15]) if decay: ax3 = plt.axes([0.07, 0.42, 0.21, 0.15]) plot_distance_vs_interactions(data, genome_seq=genome_seq, axe=ax3, resolution=decay_resolution, max_diff=max_diff, normalized=normalized) ax4 = plt.axes([0.34, 0.805, 0.6, 0.04], sharex=ax1) ax5 = plt.axes([0.34, 0.845, 0.6, 0.04], sharex=ax1) ax6 = plt.axes([0.34, 0.885, 0.6, 0.04], sharex=ax1) try: minoridata = np.nanmin(data) maxoridata = np.nanmax(data) except AttributeError: vals = [i for d in data for i in d if not np.isnan(i)] minoridata = np.min(vals) maxoridata = np.max(vals) totaloridata = np.nansum([data[i][j] for i in xrange(len(data)) for j in xrange(i, len(data[i]))]) # may not be square data = nozero_log(data, np.log2) vals = np.array([i for d in data for i in d]) vals = vals[np.isfinite(vals)] mindata = np.nanmin(vals) maxdata = np.nanmax(vals) diff = maxdata - mindata posI = 0.01 if not clim else (float(clim[0]) / diff) if clim[0] != None else 0.01 posF = 1.0 if not clim else (float(clim[1]) / diff) if clim[1] != None else 1.0 if cmap == 'tadbit': cuts = perc cdict = {'red' : [(0.0, 0.0, 0.0)], 'green': [(0.0, 0.0, 0.0)], 'blue' : [(0.0, 0.5, 0.5)]} prev_pos = 0 median = (np.median(vals) - mindata) / diff for prc in np.linspace(posI, median, cuts / 2, endpoint=False): try: pos = (np.percentile(vals, prc * 100.) - mindata) / diff prc = ((prc - posI) / (median - posI)) + 1. / cuts except ValueError: pos = prc = 0 if prev_pos >= pos: continue cdict['red' ].append([pos, prc, prc]) cdict['green'].append([pos, prc, prc]) cdict['blue' ].append([pos, 1, 1]) prev_pos = pos for prc in np.linspace(median + 1. / cuts, posF, cuts / 2, endpoint=False): try: pos = (np.percentile(vals, prc * 100.) - mindata) / diff prc = ((prc - median) / (posF - median)) except ValueError: pos = prc = 0 if prev_pos >= pos: continue cdict['red' ].append([pos, 1.0, 1.0]) cdict['green'].append([pos, 1 - prc, 1 - prc]) cdict['blue' ].append([pos, 1 - prc, 1 - prc]) prev_pos = pos pos = (np.percentile(vals ,97.) - mindata) / diff cdict['red' ].append([pos, 0.1, 0.1]) cdict['green'].append([pos, 0, 0]) cdict['blue' ].append([pos, 0, 0]) cdict['red' ].append([1.0, 1, 1]) cdict['green'].append([1.0, 1, 1]) cdict['blue' ].append([1.0, 0, 0]) cmap = LinearSegmentedColormap(cmap, cdict) clim = None else: cmap = plt.get_cmap(cmap) cmap.set_bad('darkgrey', 1) ax1.imshow(data, interpolation='none', cmap=cmap, vmin=clim[0] if clim else None, vmax=clim[1] if clim else None) size1 = len(data) size2 = len(data[0]) if size1 == size2: for i in xrange(size1): for j in xrange(i, size2): if np.isnan(data[i][j]): data[i][j] = 0 data[j][i] = 0 else: for i in xrange(size1): for j in xrange(size2): if np.isnan(data[i][j]): data[i][j] = 0 #data[j][i] = data[i][j] try: evals, evect = eigh(data) sort_perm = evals.argsort() evect = evect[sort_perm] except: evals, evect = None, None data = [i for d in data for i in d if not np.isnan(i)] gradient = np.linspace(np.nanmin(data), np.nanmax(data), max(size1, size2)) gradient = np.vstack((gradient, gradient)) h = ax2.hist(data, color='darkgrey', linewidth=2, bins=20, histtype='step', normed=True) _ = ax2.imshow(gradient, aspect='auto', cmap=cmap, extent=(np.nanmin(data), np.nanmax(data) , 0, max(h[0]))) if genome_seq: for crm in genome_seq: ax1.vlines([cumcs[crm][0]-.5, cumcs[crm][1]-.5], cumcs[crm][0]-.5, cumcs[crm][1]-.5, color='w', linestyle='-', linewidth=1, alpha=1) ax1.hlines([cumcs[crm][1]-.5, cumcs[crm][0]-.5], cumcs[crm][0]-.5, cumcs[crm][1]-.5, color='w', linestyle='-', linewidth=1, alpha=1) ax1.vlines([cumcs[crm][0]-.5, cumcs[crm][1]-.5], cumcs[crm][0]-.5, cumcs[crm][1]-.5, color='k', linestyle='--') ax1.hlines([cumcs[crm][1]-.5, cumcs[crm][0]-.5], cumcs[crm][0]-.5, cumcs[crm][1]-.5, color='k', linestyle='--') if not one: vals = [0] keys = [''] for crm in genome_seq: vals.append(cumcs[crm][0]) keys.append(crm) vals.append(cumcs[crm][1]) ax1.set_yticks(vals) ax1.set_yticklabels('') ax1.set_yticks([float(vals[i]+vals[i+1])/2 for i in xrange(len(vals) - 1)], minor=True) ax1.set_yticklabels(keys, minor=True) for t in ax1.yaxis.get_minor_ticks(): t.tick1On = False t.tick2On = False # totaloridata = ''.join([j + ('' if (i+1)%3 else ',') for i, j in enumerate(str(totaloridata)[::-1])])[::-1].strip(',') # minoridata = ''.join([j + ('' if (i+1)%3 else ',') for i, j in enumerate(str(minoridata)[::-1])])[::-1].strip(',') # maxoridata = ''.join([j + ('' if (i+1)%3 else ',') for i, j in enumerate(str(maxoridata)[::-1])])[::-1].strip(',') plt.figtext(0.05,0.25, ''.join([ (name + '\n') if name else '', 'Number of interactions: %s\n' % str(totaloridata), ('' if np.isnan(cistrans) else ('Percentage of cis interactions: %.0f%%\n' % (cistrans*100))), 'Min interactions: %s\n' % (minoridata), 'Max interactions: %s\n' % (maxoridata)])) ax2.set_xlim((np.nanmin(data), np.nanmax(data))) ax2.set_ylim((0, max(h[0]))) ax1.set_xlim ((-0.5, size1 - .5)) ax1.set_ylim ((-0.5, size2 - .5)) ax2.set_xlabel('log interaction count') # we reduce the number of dots displayed.... we just want to see the shape subdata = np.array(list(set([float(int(d*100))/100 for d in data]))) try: normfit = sc_norm.pdf(subdata, np.nanmean(data), np.nanstd(data)) except AttributeError: normfit = sc_norm.pdf(subdata, np.mean(data), np.std(data)) ax2.plot(subdata, normfit, 'w.', markersize=2.5, alpha=.4) ax2.plot(subdata, normfit, 'k.', markersize=1.5, alpha=1) ax2.set_title('skew: %.3f, kurtosis: %.3f' % (skew(data), kurtosis(data))) try: ax4.vlines(range(size1), 0, evect[:,-1], color='k') except (TypeError, IndexError): pass ax4.hlines(0, 0, size2, color='red') ax4.set_ylabel('E1') ax4.set_yticklabels([]) try: ax5.vlines(range(size1), 0, evect[:,-2], color='k') except (TypeError, IndexError): pass ax5.hlines(0, 0, size2, color='red') ax5.set_ylabel('E2') ax5.set_yticklabels([]) try: ax6.vlines(range(size1), 0, evect[:,-3], color='k') except (TypeError, IndexError): pass ax6.hlines(0, 0, size2, color='red') ax6.set_ylabel('E3') ax6.set_yticklabels([]) xticklabels = ax4.get_xticklabels() + ax5.get_xticklabels() + ax6.get_xticklabels() plt.setp(xticklabels, visible=False) if savefig: tadbit_savefig(savefig) elif show: plt.show() plt.close('all')
def eig_correlate_matrices(hic_data1, hic_data2, nvect=6, savefig=None, show=False, savedata=None, **kwargs): """ Compare the iteractions of two Hi-C matrices using their 6 first eigenvectors, with spearman rank correlation :param hic_data1: Hi-C-data object :param hic_data2: Hi-C-data object :param 6 nvect: number of eigenvectors to compare :param None savefig: path to save the plot :param False show: displays the plot :param kwargs: any argument to pass to matplotlib imshow function :returns: matrix of correlations """ data1 = hic_data1.get_matrix() data2 = hic_data2.get_matrix() # get the log size = len(data1) data1 = nozero_log(data1, np.log2) data2 = nozero_log(data2, np.log2) # get the eigenvectors ev1, evect1 = eigh(data1) ev2, evect2 = eigh(data2) corr = [[0 for _ in xrange(nvect)] for _ in xrange(nvect)] # sort eigenvectors according to their eigenvalues => first is last!! sort_perm = ev1.argsort() ev1.sort() evect1 = evect1[sort_perm] sort_perm = ev2.argsort() ev2.sort() evect2 = evect2[sort_perm] # calculate Pearson correlation for i in xrange(nvect): for j in xrange(nvect): corr[i][j] = abs(pearsonr(evect1[:,-i-1], evect2[:,-j-1])[0]) # plot axe = plt.axes([0.1, 0.1, 0.6, 0.8]) cbaxes = plt.axes([0.85, 0.1, 0.03, 0.8]) if show or savefig: im = axe.imshow(corr, interpolation="nearest",origin='lower', **kwargs) axe.set_xlabel('Eigen Vectors exp. 1') axe.set_ylabel('Eigen Vectors exp. 2') axe.set_xticks(range(nvect)) axe.set_yticks(range(nvect)) axe.set_xticklabels(range(1, nvect + 2)) axe.set_yticklabels(range(1, nvect + 2)) axe.xaxis.set_tick_params(length=0, width=0) axe.yaxis.set_tick_params(length=0, width=0) cbar = plt.colorbar(im, cax = cbaxes ) cbar.ax.set_ylabel('Pearson correlation', rotation=90*3, verticalalignment='bottom') axe2 = axe.twinx() axe2.set_yticks(range(nvect)) axe2.set_yticklabels(['%.1f' % (e) for e in ev2[-nvect:][::-1]]) axe2.set_ylabel('corresponding Eigen Values exp. 2', rotation=90*3, verticalalignment='bottom') axe2.set_ylim((-0.5, nvect - 0.5)) axe2.yaxis.set_tick_params(length=0, width=0) axe3 = axe.twiny() axe3.set_xticks(range(nvect)) axe3.set_xticklabels(['%.1f' % (e) for e in ev1[-nvect:][::-1]]) axe3.set_xlabel('corresponding Eigen Values exp. 1') axe3.set_xlim((-0.5, nvect - 0.5)) axe3.xaxis.set_tick_params(length=0, width=0) axe.set_ylim((-0.5, nvect - 0.5)) axe.set_xlim((-0.5, nvect - 0.5)) if savefig: tadbit_savefig(savefig) if show: plt.show() plt.close('all') if savedata: out = open(savedata, 'w') out.write('# ' + '\t'.join(['Eigen Vector %s'% i for i in xrange(nvect)]) + '\n') for i in xrange(nvect): out.write('\t'.join([str(corr[i][j]) for j in xrange(nvect)]) + '\n') out.close() return corr
def draw_map(data, genome_seq, cumcs, savefig, show, one=False, clim=None, cmap='jet', decay=False, perc=10, name=None, cistrans=None, decay_resolution=10000, normalized=False, max_diff=None): _ = plt.figure(figsize=(15.,12.5)) if not max_diff: max_diff = len(data) ax1 = plt.axes([0.34, 0.08, 0.6, 0.7205]) ax2 = plt.axes([0.07, 0.65, 0.21, 0.15]) if decay: ax3 = plt.axes([0.07, 0.42, 0.21, 0.15]) plot_distance_vs_interactions(data, genome_seq=genome_seq, axe=ax3, resolution=decay_resolution, max_diff=max_diff, normalized=normalized) ax4 = plt.axes([0.34, 0.805, 0.6, 0.04], sharex=ax1) ax5 = plt.axes([0.34, 0.845, 0.6, 0.04], sharex=ax1) ax6 = plt.axes([0.34, 0.885, 0.6, 0.04], sharex=ax1) try: minoridata = np.nanmin(data) maxoridata = np.nanmax(data) except AttributeError: vals = [i for d in data for i in d if not np.isnan(i)] minoridata = np.min(vals) maxoridata = np.max(vals) totaloridata = np.nansum([data[i][j] for i in xrange(len(data)) for j in xrange(i, len(data))]) data = nozero_log(data, np.log2) vals = np.array([i for d in data for i in d]) vals = vals[np.isfinite(vals)] mindata = np.nanmin(vals) maxdata = np.nanmax(vals) diff = maxdata - mindata posI = 0.01 if not clim else (float(clim[0]) / diff) if clim[0] != None else 0.01 posF = 1.0 if not clim else (float(clim[1]) / diff) if clim[1] != None else 1.0 if cmap == 'tadbit': cuts = perc cdict = {'red' : [(0.0, 0.0, 0.0)], 'green': [(0.0, 0.0, 0.0)], 'blue' : [(0.0, 0.5, 0.5)]} prev_pos = 0 median = (np.median(vals) - mindata) / diff for prc in np.linspace(posI, median, cuts / 2, endpoint=False): try: pos = (np.percentile(vals, prc * 100.) - mindata) / diff prc = ((prc - posI) / (median - posI)) + 1. / cuts except ValueError: pos = prc = 0 if prev_pos >= pos: continue cdict['red' ].append([pos, prc, prc]) cdict['green'].append([pos, prc, prc]) cdict['blue' ].append([pos, 1, 1]) prev_pos = pos for prc in np.linspace(median + 1. / cuts, posF, cuts / 2, endpoint=False): try: pos = (np.percentile(vals, prc * 100.) - mindata) / diff prc = ((prc - median) / (posF - median)) except ValueError: pos = prc = 0 if prev_pos >= pos: continue cdict['red' ].append([pos, 1.0, 1.0]) cdict['green'].append([pos, 1 - prc, 1 - prc]) cdict['blue' ].append([pos, 1 - prc, 1 - prc]) prev_pos = pos pos = (np.percentile(vals ,97.) - mindata) / diff cdict['red' ].append([pos, 0.1, 0.1]) cdict['green'].append([pos, 0, 0]) cdict['blue' ].append([pos, 0, 0]) cdict['red' ].append([1.0, 1, 1]) cdict['green'].append([1.0, 1, 1]) cdict['blue' ].append([1.0, 0, 0]) cmap = LinearSegmentedColormap(cmap, cdict) clim = None else: cmap = plt.get_cmap(cmap) cmap.set_bad('darkgrey', 1) ax1.imshow(data, interpolation='none', cmap=cmap, vmin=clim[0] if clim else None, vmax=clim[1] if clim else None) size = len(data) for i in xrange(size): for j in xrange(i, size): if np.isnan(data[i][j]): data[i][j] = 0 data[j][i] = 0 #data[j][i] = data[i][j] evals, evect = eigh(data) sort_perm = evals.argsort() evect = evect[sort_perm] data = [i for d in data for i in d if not np.isnan(i)] gradient = np.linspace(np.nanmin(data), np.nanmax(data), size) gradient = np.vstack((gradient, gradient)) h = ax2.hist(data, color='darkgrey', linewidth=2, bins=20, histtype='step', normed=True) _ = ax2.imshow(gradient, aspect='auto', cmap=cmap, extent=(np.nanmin(data), np.nanmax(data) , 0, max(h[0]))) if genome_seq: for crm in genome_seq: ax1.vlines([cumcs[crm][0]-.5, cumcs[crm][1]-.5], cumcs[crm][0]-.5, cumcs[crm][1]-.5, color='w', linestyle='-', linewidth=1, alpha=1) ax1.hlines([cumcs[crm][1]-.5, cumcs[crm][0]-.5], cumcs[crm][0]-.5, cumcs[crm][1]-.5, color='w', linestyle='-', linewidth=1, alpha=1) ax1.vlines([cumcs[crm][0]-.5, cumcs[crm][1]-.5], cumcs[crm][0]-.5, cumcs[crm][1]-.5, color='k', linestyle='--') ax1.hlines([cumcs[crm][1]-.5, cumcs[crm][0]-.5], cumcs[crm][0]-.5, cumcs[crm][1]-.5, color='k', linestyle='--') if not one: vals = [0] keys = [''] for crm in genome_seq: vals.append(cumcs[crm][0]) keys.append(crm) vals.append(cumcs[crm][1]) ax1.set_yticks(vals) ax1.set_yticklabels('') ax1.set_yticks([float(vals[i]+vals[i+1])/2 for i in xrange(len(vals) - 1)], minor=True) ax1.set_yticklabels(keys, minor=True) for t in ax1.yaxis.get_minor_ticks(): t.tick1On = False t.tick2On = False # totaloridata = ''.join([j + ('' if (i+1)%3 else ',') for i, j in enumerate(str(totaloridata)[::-1])])[::-1].strip(',') # minoridata = ''.join([j + ('' if (i+1)%3 else ',') for i, j in enumerate(str(minoridata)[::-1])])[::-1].strip(',') # maxoridata = ''.join([j + ('' if (i+1)%3 else ',') for i, j in enumerate(str(maxoridata)[::-1])])[::-1].strip(',') plt.figtext(0.05,0.25, ''.join([ (name + '\n') if name else '', 'Number of interactions: %s\n' % str(totaloridata), ('' if np.isnan(cistrans) else ('Percentage of cis interactions: %.0f%%\n' % (cistrans*100))), 'Min interactions: %s\n' % (minoridata), 'Max interactions: %s\n' % (maxoridata)])) ax2.set_xlim((np.nanmin(data), np.nanmax(data))) ax2.set_ylim((0, max(h[0]))) ax1.set_xlim ((-0.5, size - .5)) ax1.set_ylim ((-0.5, size - .5)) ax2.set_xlabel('log interaction count') # we reduce the number of dots displayed.... we just want to see the shape subdata = np.array(list(set([float(int(d*100))/100 for d in data]))) try: normfit = sc_norm.pdf(subdata, np.nanmean(data), np.nanstd(data)) except AttributeError: normfit = sc_norm.pdf(subdata, np.mean(data), np.std(data)) ax2.plot(subdata, normfit, 'w.', markersize=2.5, alpha=.4) ax2.plot(subdata, normfit, 'k.', markersize=1.5, alpha=1) ax2.set_title('skew: %.3f, kurtosis: %.3f' % (skew(data), kurtosis(data))) ax4.vlines(range(size), 0, evect[:,-1], color='k') ax4.hlines(0, 0, size, color='red') ax4.set_ylabel('E1') ax4.set_yticklabels([]) try: ax5.vlines(range(size), 0, evect[:,-2], color='k') except IndexError: pass ax5.hlines(0, 0, size, color='red') ax5.set_ylabel('E2') ax5.set_yticklabels([]) try: ax6.vlines(range(size), 0, evect[:,-3], color='k') except IndexError: pass ax6.hlines(0, 0, size, color='red') ax6.set_ylabel('E3') ax6.set_yticklabels([]) xticklabels = ax4.get_xticklabels() + ax5.get_xticklabels() + ax6.get_xticklabels() plt.setp(xticklabels, visible=False) if savefig: tadbit_savefig(savefig) elif show: plt.show() plt.close('all')
def draw_map( data, genome_seq, cumcs, savefig, show, one=False, clim=None, cmap="jet", decay=False, perc=10, name=None, cistrans=None, decay_resolution=10000, normalized=False, max_diff=None, ): _ = plt.figure(figsize=(15.0, 12.5)) if not max_diff: max_diff = len(data) ax1 = plt.axes([0.34, 0.08, 0.6, 0.7205]) ax2 = plt.axes([0.07, 0.65, 0.21, 0.15]) if decay: ax3 = plt.axes([0.07, 0.42, 0.21, 0.15]) plot_distance_vs_interactions( data, genome_seq=genome_seq, axe=ax3, resolution=decay_resolution, max_diff=max_diff, normalized=normalized ) ax4 = plt.axes([0.34, 0.805, 0.6, 0.04], sharex=ax1) ax5 = plt.axes([0.34, 0.845, 0.6, 0.04], sharex=ax1) ax6 = plt.axes([0.34, 0.885, 0.6, 0.04], sharex=ax1) try: minoridata = np.nanmin(data) maxoridata = np.nanmax(data) except AttributeError: vals = [i for d in data for i in d if not np.isnan(i)] minoridata = np.min(vals) maxoridata = np.max(vals) totaloridata = np.nansum([data[i][j] for i in xrange(len(data)) for j in xrange(i, len(data))]) data = nozero_log(data, np.log2) vals = np.array([i for d in data for i in d]) vals = vals[np.isfinite(vals)] mindata = np.nanmin(vals) maxdata = np.nanmax(vals) diff = maxdata - mindata posI = 0.01 if not clim else (float(clim[0]) / diff) if clim[0] != None else 0.01 posF = 1.0 if not clim else (float(clim[1]) / diff) if clim[1] != None else 1.0 if cmap == "tadbit": cuts = perc cdict = {"red": [(0.0, 0.0, 0.0)], "green": [(0.0, 0.0, 0.0)], "blue": [(0.0, 0.5, 0.5)]} prev_pos = 0 median = (np.median(vals) - mindata) / diff for prc in np.linspace(posI, median, cuts / 2, endpoint=False): try: pos = (np.percentile(vals, prc * 100.0) - mindata) / diff prc = ((prc - posI) / (median - posI)) + 1.0 / cuts except ValueError: pos = prc = 0 if prev_pos >= pos: continue cdict["red"].append([pos, prc, prc]) cdict["green"].append([pos, prc, prc]) cdict["blue"].append([pos, 1, 1]) prev_pos = pos for prc in np.linspace(median + 1.0 / cuts, posF, cuts / 2, endpoint=False): try: pos = (np.percentile(vals, prc * 100.0) - mindata) / diff prc = (prc - median) / (posF - median) except ValueError: pos = prc = 0 if prev_pos >= pos: continue cdict["red"].append([pos, 1.0, 1.0]) cdict["green"].append([pos, 1 - prc, 1 - prc]) cdict["blue"].append([pos, 1 - prc, 1 - prc]) prev_pos = pos pos = (np.percentile(vals, 97.0) - mindata) / diff cdict["red"].append([pos, 0.1, 0.1]) cdict["green"].append([pos, 0, 0]) cdict["blue"].append([pos, 0, 0]) cdict["red"].append([1.0, 1, 1]) cdict["green"].append([1.0, 1, 1]) cdict["blue"].append([1.0, 0, 0]) cmap = LinearSegmentedColormap(cmap, cdict) clim = None else: cmap = plt.get_cmap(cmap) cmap.set_bad("darkgrey", 1) ax1.imshow(data, interpolation="none", cmap=cmap, vmin=clim[0] if clim else None, vmax=clim[1] if clim else None) size = len(data) for i in xrange(size): for j in xrange(i, size): if np.isnan(data[i][j]): data[i][j] = 0 data[j][i] = 0 # data[j][i] = data[i][j] evals, evect = eigh(data) sort_perm = evals.argsort() evect = evect[sort_perm] data = [i for d in data for i in d if not np.isnan(i)] gradient = np.linspace(np.nanmin(data), np.nanmax(data), size) gradient = np.vstack((gradient, gradient)) h = ax2.hist(data, color="darkgrey", linewidth=2, bins=20, histtype="step", normed=True) _ = ax2.imshow(gradient, aspect="auto", cmap=cmap, extent=(np.nanmin(data), np.nanmax(data), 0, max(h[0]))) if genome_seq: for crm in genome_seq: ax1.vlines( [cumcs[crm][0] - 0.5, cumcs[crm][1] - 0.5], cumcs[crm][0] - 0.5, cumcs[crm][1] - 0.5, color="w", linestyle="-", linewidth=1, alpha=1, ) ax1.hlines( [cumcs[crm][1] - 0.5, cumcs[crm][0] - 0.5], cumcs[crm][0] - 0.5, cumcs[crm][1] - 0.5, color="w", linestyle="-", linewidth=1, alpha=1, ) ax1.vlines( [cumcs[crm][0] - 0.5, cumcs[crm][1] - 0.5], cumcs[crm][0] - 0.5, cumcs[crm][1] - 0.5, color="k", linestyle="--", ) ax1.hlines( [cumcs[crm][1] - 0.5, cumcs[crm][0] - 0.5], cumcs[crm][0] - 0.5, cumcs[crm][1] - 0.5, color="k", linestyle="--", ) if not one: vals = [0] keys = [""] for crm in genome_seq: vals.append(cumcs[crm][0]) keys.append(crm) vals.append(cumcs[crm][1]) ax1.set_yticks(vals) ax1.set_yticklabels("") ax1.set_yticks([float(vals[i] + vals[i + 1]) / 2 for i in xrange(len(vals) - 1)], minor=True) ax1.set_yticklabels(keys, minor=True) for t in ax1.yaxis.get_minor_ticks(): t.tick1On = False t.tick2On = False # totaloridata = ''.join([j + ('' if (i+1)%3 else ',') for i, j in enumerate(str(totaloridata)[::-1])])[::-1].strip(',') # minoridata = ''.join([j + ('' if (i+1)%3 else ',') for i, j in enumerate(str(minoridata)[::-1])])[::-1].strip(',') # maxoridata = ''.join([j + ('' if (i+1)%3 else ',') for i, j in enumerate(str(maxoridata)[::-1])])[::-1].strip(',') plt.figtext( 0.05, 0.25, "".join( [ (name + "\n") if name else "", "Number of interactions: %s\n" % str(totaloridata), ("" if np.isnan(cistrans) else ("Percentage of cis interactions: %.0f%%\n" % (cistrans * 100))), "Min interactions: %s\n" % (minoridata), "Max interactions: %s\n" % (maxoridata), ] ), ) ax2.set_xlim((np.nanmin(data), np.nanmax(data))) ax2.set_ylim((0, max(h[0]))) ax1.set_xlim((-0.5, size - 0.5)) ax1.set_ylim((-0.5, size - 0.5)) ax2.set_xlabel("log interaction count") # we reduce the number of dots displayed.... we just want to see the shape subdata = np.array(list(set([float(int(d * 100)) / 100 for d in data]))) try: normfit = sc_norm.pdf(subdata, np.nanmean(data), np.nanstd(data)) except AttributeError: normfit = sc_norm.pdf(subdata, np.mean(data), np.std(data)) ax2.plot(subdata, normfit, "w.", markersize=2.5, alpha=0.4) ax2.plot(subdata, normfit, "k.", markersize=1.5, alpha=1) ax2.set_title("skew: %.3f, kurtosis: %.3f" % (skew(data), kurtosis(data))) ax4.vlines(range(size), 0, evect[:, -1], color="k") ax4.hlines(0, 0, size, color="red") ax4.set_ylabel("E1") ax4.set_yticklabels([]) try: ax5.vlines(range(size), 0, evect[:, -2], color="k") except IndexError: pass ax5.hlines(0, 0, size, color="red") ax5.set_ylabel("E2") ax5.set_yticklabels([]) try: ax6.vlines(range(size), 0, evect[:, -3], color="k") except IndexError: pass ax6.hlines(0, 0, size, color="red") ax6.set_ylabel("E3") ax6.set_yticklabels([]) xticklabels = ax4.get_xticklabels() + ax5.get_xticklabels() + ax6.get_xticklabels() plt.setp(xticklabels, visible=False) if savefig: tadbit_savefig(savefig) elif show: plt.show() plt.close("all")