def geneate_biases_ICE(chromosome, matrix, bins, output): chro_name = str(chromosome) mid_points = (bins[:, 1] + bins[:, 2]) / 2 mid_points = mid_points.astype(int) X, bias = normalization.ICE_normalization(matrix, output_bias=True) bias = bias.flatten().astype(float) with open(os.path.join(output + '_bias.txt'), 'w+') as f: for mp, bs in zip(mid_points, bias): line = '{}\t{}\t{}\n'.format(chro_name, mp, bs) f.write(line) f.close() with open(os.path.join(output + '_bias.txt'), 'rb') as f_in: with gzip.open(os.path.join(output + '_bias.txt.gz'), 'wb') as f_out: shutil.copyfileobj(f_in, f_out) f_out.close() f_in.close() os.remove(os.path.join(output + '_bias.txt'))
def plot_demo(source_dir, chromosome, model_name, data_type, resolution, start, end, destination_dir): cool_file = os.path.join(source_dir, 'sample_{}_chr{}.cool'.format(model_name, chromosome)) hic = cooler.Cooler(cool_file) start = max(0, int(start)) if end > hic.chromsizes['chr{}'.format(chromosome)]: length = end - start end = hic.chromsizes['chr{}'.format(chromosome)] start = end - length region = ('chr{}'.format(chromosome), start, end) hic_mat = hic.matrix(balance=True).fetch(region) hic_mat = normalization.ICE_normalization(hic_mat) # hic_bins = hic.bins().fetch(region) # weight = hic_bins['weight'] # filter_idx = np.array(np.where(weight==1)).flatten() fig, ax0 = plt.subplots() cmap = plt.get_cmap('RdBu_r') hic_mat = filter_diag_boundary(hic_mat, diag_k=1, boundary_k=200) Z = np.log1p(hic_mat) bounds = np.append(np.arange(0,7,0.06), np.arange(7,12,0.3)) norm = matplotlib.colors.BoundaryNorm(boundaries=bounds, ncolors=256) im = ax0.imshow(Z, cmap=cmap, norm=norm) #, vmin=0, vmax=8 fig.colorbar(im, ax=ax0, ticks=np.arange(0,8)) legend = {'ours': 'EnHiC', 'deephic': 'Deephic', 'hicsr':'HiCSR', 'low':'LR', 'high':'HR'} name = model_name.split('_')[0] if name == 'high' and 'down' in data_type: ax0.set_title('{}'.format(legend[name]), fontsize=24) else: ax0.set_title('{}, {}'.format(legend[name], data_type), fontsize=24) ax0.set_xlim(-1, hic_mat.shape[0]) ax0.set_ylim(-1, hic_mat.shape[1]) fig.tight_layout() output = destination_dir os.makedirs(output, exist_ok=True) plt.savefig(os.path.join(output, 'demo_{}.pdf'.format(legend[name])), format='pdf') plt.savefig(os.path.join(output, 'demo_{}.jpg'.format(legend[name])), format='jpg')
def plot_significant_interactions(source_dir, chromosome, model_name, resolution, low_dis, up_dis, start, end): cool_file = os.path.join(source_dir, '{}_chr{}.cool'.format(model_name, chromosome)) hic = cooler.Cooler(cool_file) start = max(0, int(start)) if end > hic.chromsizes['chr{}'.format(chromosome)]: length = end - start end = hic.chromsizes['chr{}'.format(chromosome)] start = end - length region = ('chr{}'.format(chromosome), start, end) hic_mat = hic.matrix(balance=True).fetch(region) hic_mat = normalization.ICE_normalization(hic_mat) # hic_bins = hic.bins().fetch(region) # weight = hic_bins['weight'] # filter_idx = np.array(np.where(weight==1)).flatten() prefix = '{}_chr{}_{}_{}'.format(model_name, chromosome, start, end) model_path = os.path.join( source_dir, 'output_{}_{}'.format(start, end), prefix, 'FitHiC.spline_pass1.res10000.significances.txt.gz') if not os.path.isfile(model_path): return model_data = pd.read_csv(model_path, compression='gzip', header=0, sep='\t') if model_data.empty: return model_si = extract_si(model_data) q_idx = np.array(np.where(model_si[:, 2] < 0.05)).flatten() si_x = np.floor((model_si[q_idx, 0].flatten() - start) / resolution) si_y = np.floor((model_si[q_idx, 1].flatten() - start) / resolution) fig, ax0 = plt.subplots() cmap = plt.get_cmap('coolwarm') hic_mat = filter_diag_boundary(hic_mat, diag_k=1, boundary_k=200) Z = np.log1p(hic_mat) im = ax0.imshow(Z, cmap=cmap, vmin=0, vmax=8) fig.colorbar(im, ax=ax0) ax0.scatter(si_x.flatten(), si_y.flatten(), color='#00ff00', s=6, edgecolors='none') # "turquoise" legend = { 'ours': 'EnHiC', 'deephic': 'Deephic', 'hicsr': 'HiCSR', 'low': 'LR', 'high': 'HR' } name = model_name.split('_')[0] ax0.set_title('{} log1p Scale'.format(legend[name])) ax0.set_xlim(-1, hic_mat.shape[0]) ax0.set_ylim(-1, hic_mat.shape[1]) fig.tight_layout() output = os.path.join(source_dir, 'figure', '{}_{}'.format(start, end)) os.makedirs(output, exist_ok=True) plt.savefig(os.path.join( output, '{}_chr{}_{}_{}.pdf'.format(model_name, chromosome, start, end)), format='pdf') plt.savefig(os.path.join( output, '{}_chr{}_{}_{}.jpg'.format(model_name, chromosome, start, end)), format='jpg')
from matplotlib import colors from iced import datasets from iced import filter from iced import normalization """ Normalizing a contact count matrix. """ # Loading a sample dataset counts, lengths = datasets.load_sample_yeast() # Filtering and normalizing contact count data normed = filter.filter_low_counts(counts, lengths=lengths, percentage=0.04) normed = normalization.ICE_normalization(normed) # Plotting the results using matplotlib chromosomes = ["I", "II", "III", "IV", "V", "VI"] fig, axes = plt.subplots(ncols=2, figsize=(12, 4)) axes[0].imshow(counts, cmap="Blues", norm=colors.SymLogNorm(1), origin="bottom", extent=(0, len(counts), 0, len(counts))) [axes[0].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] [axes[0].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] axes[0].set_title("Raw contact counts") m = axes[1].imshow(normed, cmap="Blues", norm=colors.SymLogNorm(1),
from iced import filter, normalization import numpy as np filePtr = './GSM1173492_Th1_ensemble/50kb/' for i in range(1, 11): counts = np.genfromtxt(filePtr + 'chr' + str(i) + '_50kb.txt', delimiter=' ') counts = filter.filter_low_counts(counts, percentage=0.04) normed = normalization.ICE_normalization(counts) np.savetxt(filePtr + 'Iced_chr' + str(i) + '_' + '_50kb.txt', normed, delimiter=',')
# # The normalization is done in three step: # # 1. Normalize the data using LOIC, to remove GC, mappability, and other # biases # 2. Estimate the block biases due to copy number. # 3. Remove the block biases from the LOIC-normalized contact counts from iced import datasets from iced import normalization import matplotlib.pyplot as plt from matplotlib import colors counts, lengths, cnv = datasets.load_sample_cancer() loic_normed = normalization.ICE_normalization(counts, counts_profile=cnv) block_biases = normalization.estimate_block_biases(counts, lengths, cnv) caic_normed = loic_normed / block_biases ############################################################################### # Visualizing the results using Matplotlib # ---------------------------------------- # # The following code visualizes the raw original data, the estimated block # biases, and the normalized matrix using the CAIC method. chromosomes = ["I", "II", "III", "IV", "V", "VI"] fig, axes = plt.subplots(ncols=3, figsize=(14, 3)) axes[0].imshow(counts, cmap="RdBu_r",