Пример #1
0
def geneate_biases_ICE(chromosome, matrix, bins, output):
    chro_name = str(chromosome)
    mid_points = (bins[:, 1] + bins[:, 2]) / 2
    mid_points = mid_points.astype(int)
    X, bias = normalization.ICE_normalization(matrix, output_bias=True)
    bias = bias.flatten().astype(float)
    with open(os.path.join(output + '_bias.txt'), 'w+') as f:
        for mp, bs in zip(mid_points, bias):
            line = '{}\t{}\t{}\n'.format(chro_name, mp, bs)
            f.write(line)
    f.close()
    with open(os.path.join(output + '_bias.txt'), 'rb') as f_in:
        with gzip.open(os.path.join(output + '_bias.txt.gz'), 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
        f_out.close()
    f_in.close()
    os.remove(os.path.join(output + '_bias.txt'))
Пример #2
0
def plot_demo(source_dir, chromosome, model_name, data_type, resolution, start, end, destination_dir):
    cool_file = os.path.join(source_dir, 'sample_{}_chr{}.cool'.format(model_name, chromosome))
    hic = cooler.Cooler(cool_file)
    start = max(0, int(start))
    if end > hic.chromsizes['chr{}'.format(chromosome)]:
        length = end - start
        end = hic.chromsizes['chr{}'.format(chromosome)]
        start = end - length

    region = ('chr{}'.format(chromosome), start, end)
    hic_mat = hic.matrix(balance=True).fetch(region)
    hic_mat = normalization.ICE_normalization(hic_mat)
    # hic_bins = hic.bins().fetch(region)
    # weight = hic_bins['weight']
    # filter_idx = np.array(np.where(weight==1)).flatten()


    fig, ax0 = plt.subplots()
    cmap = plt.get_cmap('RdBu_r')
    hic_mat = filter_diag_boundary(hic_mat, diag_k=1, boundary_k=200)
    Z = np.log1p(hic_mat)

    bounds = np.append(np.arange(0,7,0.06), np.arange(7,12,0.3))
    norm = matplotlib.colors.BoundaryNorm(boundaries=bounds, ncolors=256)
    im = ax0.imshow(Z, cmap=cmap, norm=norm) #, vmin=0, vmax=8
    fig.colorbar(im, ax=ax0, ticks=np.arange(0,8))

    legend = {'ours': 'EnHiC', 'deephic': 'Deephic', 'hicsr':'HiCSR', 'low':'LR', 'high':'HR'}
    name = model_name.split('_')[0]
    if name == 'high' and 'down' in data_type:
        ax0.set_title('{}'.format(legend[name]), fontsize=24)
    else:
        ax0.set_title('{}, {}'.format(legend[name], data_type), fontsize=24)
    ax0.set_xlim(-1, hic_mat.shape[0])
    ax0.set_ylim(-1, hic_mat.shape[1])
    fig.tight_layout()
    output = destination_dir
    os.makedirs(output, exist_ok=True)
    plt.savefig(os.path.join(output, 'demo_{}.pdf'.format(legend[name])), format='pdf')
    plt.savefig(os.path.join(output, 'demo_{}.jpg'.format(legend[name])), format='jpg')
Пример #3
0
def plot_significant_interactions(source_dir, chromosome, model_name,
                                  resolution, low_dis, up_dis, start, end):
    cool_file = os.path.join(source_dir,
                             '{}_chr{}.cool'.format(model_name, chromosome))
    hic = cooler.Cooler(cool_file)
    start = max(0, int(start))
    if end > hic.chromsizes['chr{}'.format(chromosome)]:
        length = end - start
        end = hic.chromsizes['chr{}'.format(chromosome)]
        start = end - length

    region = ('chr{}'.format(chromosome), start, end)
    hic_mat = hic.matrix(balance=True).fetch(region)
    hic_mat = normalization.ICE_normalization(hic_mat)
    # hic_bins = hic.bins().fetch(region)
    # weight = hic_bins['weight']
    # filter_idx = np.array(np.where(weight==1)).flatten()

    prefix = '{}_chr{}_{}_{}'.format(model_name, chromosome, start, end)
    model_path = os.path.join(
        source_dir, 'output_{}_{}'.format(start, end), prefix,
        'FitHiC.spline_pass1.res10000.significances.txt.gz')
    if not os.path.isfile(model_path):
        return
    model_data = pd.read_csv(model_path,
                             compression='gzip',
                             header=0,
                             sep='\t')
    if model_data.empty:
        return
    model_si = extract_si(model_data)

    q_idx = np.array(np.where(model_si[:, 2] < 0.05)).flatten()
    si_x = np.floor((model_si[q_idx, 0].flatten() - start) / resolution)
    si_y = np.floor((model_si[q_idx, 1].flatten() - start) / resolution)

    fig, ax0 = plt.subplots()
    cmap = plt.get_cmap('coolwarm')
    hic_mat = filter_diag_boundary(hic_mat, diag_k=1, boundary_k=200)
    Z = np.log1p(hic_mat)
    im = ax0.imshow(Z, cmap=cmap, vmin=0, vmax=8)
    fig.colorbar(im, ax=ax0)

    ax0.scatter(si_x.flatten(),
                si_y.flatten(),
                color='#00ff00',
                s=6,
                edgecolors='none')  # "turquoise"
    legend = {
        'ours': 'EnHiC',
        'deephic': 'Deephic',
        'hicsr': 'HiCSR',
        'low': 'LR',
        'high': 'HR'
    }
    name = model_name.split('_')[0]
    ax0.set_title('{} log1p Scale'.format(legend[name]))
    ax0.set_xlim(-1, hic_mat.shape[0])
    ax0.set_ylim(-1, hic_mat.shape[1])
    fig.tight_layout()
    output = os.path.join(source_dir, 'figure', '{}_{}'.format(start, end))
    os.makedirs(output, exist_ok=True)
    plt.savefig(os.path.join(
        output, '{}_chr{}_{}_{}.pdf'.format(model_name, chromosome, start,
                                            end)),
                format='pdf')
    plt.savefig(os.path.join(
        output, '{}_chr{}_{}_{}.jpg'.format(model_name, chromosome, start,
                                            end)),
                format='jpg')
Пример #4
0
from matplotlib import colors

from iced import datasets
from iced import filter
from iced import normalization

"""
Normalizing a contact count matrix.
"""

# Loading a sample dataset
counts, lengths = datasets.load_sample_yeast()

# Filtering and normalizing contact count data
normed = filter.filter_low_counts(counts, lengths=lengths, percentage=0.04)
normed = normalization.ICE_normalization(normed)

# Plotting the results using matplotlib
chromosomes = ["I", "II", "III", "IV", "V", "VI"]

fig, axes = plt.subplots(ncols=2, figsize=(12, 4))

axes[0].imshow(counts, cmap="Blues", norm=colors.SymLogNorm(1),
               origin="bottom",
               extent=(0, len(counts), 0, len(counts)))

[axes[0].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()]
[axes[0].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()]
axes[0].set_title("Raw contact counts")

m = axes[1].imshow(normed, cmap="Blues", norm=colors.SymLogNorm(1),
from iced import filter, normalization

import numpy as np

filePtr = './GSM1173492_Th1_ensemble/50kb/'

for i in range(1, 11):
    counts = np.genfromtxt(filePtr + 'chr' + str(i) + '_50kb.txt',
                           delimiter=' ')

    counts = filter.filter_low_counts(counts, percentage=0.04)
    normed = normalization.ICE_normalization(counts)

    np.savetxt(filePtr + 'Iced_chr' + str(i) + '_' + '_50kb.txt',
               normed,
               delimiter=',')
Пример #6
0
#
# The normalization is done in three step:
#
#   1. Normalize the data using LOIC, to remove GC, mappability, and other
#      biases
#   2. Estimate the block biases due to copy number.
#   3. Remove the block biases from the LOIC-normalized contact counts

from iced import datasets
from iced import normalization
import matplotlib.pyplot as plt
from matplotlib import colors

counts, lengths, cnv = datasets.load_sample_cancer()

loic_normed = normalization.ICE_normalization(counts, counts_profile=cnv)
block_biases = normalization.estimate_block_biases(counts, lengths, cnv)
caic_normed = loic_normed / block_biases

###############################################################################
# Visualizing the results using Matplotlib
# ----------------------------------------
#
# The following code visualizes the raw original data, the estimated block
# biases, and the normalized matrix using the CAIC method.
chromosomes = ["I", "II", "III", "IV", "V", "VI"]

fig, axes = plt.subplots(ncols=3, figsize=(14, 3))

axes[0].imshow(counts,
               cmap="RdBu_r",