genome_db_contig = genome_db_chrmLevel

#hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filtered/ChEF-all-HindIII-100k.hm.IC"
#second_hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filtered/Blood-all-HindIII-100k.hm.IC"

########################WRITE YOUR HEATMAP HERE########################
hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filteredChrmLevel/ChEF-all-HindIII-40k.hm.IC"
domains_files_Arm = "mapped-GalGal5filtered/GalGal5filteredChrmLevel/ChEF-all-HindIII-40k.hm.gzipped_matrix/ChEF-all-HindIII-40k.hm.gzipped_matrix.jucebox_domains.annotation"
domains_files_Dix = "/mnt/storage/home/vsfishman/HiC/data/chick/DixonDomainsChEF_all_HindIII_40k.hm.IC_domains_40KB/DixonDomainsChEF_all_HindIII_40k.hm.IC_domains_40KB.jucebox_domains.annotation"

second_hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filteredChrmLevel/Blood-all-HindIII-40k.hm.IC"
second_domains_files_Arm = "mapped-GalGal5filtered/GalGal5filteredChrmLevel/Blood-all-HindIII-40k.hm.gzipped_matrix/Blood-all-HindIII-40k.hm.gzipped_matrix.jucebox_domains.annotation"
second_domains_files_Dix = "/mnt/storage/home/vsfishman/HiC/data/chick/DixonDomainsBlood_all_HindIII_40k.hm.IC_domains_40KB/DixonDomainsBlood_all_HindIII_40k.hm.IC_domains_40KB.jucebox_domains.annotation"


resolution = extractResolutionFromFileName(hm_file)
assert resolution == extractResolutionFromFileName(second_hm_file)

data_dict = {} # a structure to keep chrms arrays
data_dict_second_hmap = {} # a structure to keep chrms arrays
				
for chrm,array in enumerate(get_chromosomes(hm_file,genome_db_contig,resolution)):
	data_dict[chrm] = array
for chrm,array in enumerate(get_chromosomes(second_hm_file,genome_db_contig,resolution)):
	data_dict_second_hmap[chrm] = array

#chrm,nt_start,nt_end,title

#######################################ADD REGIONS TO PLOT HERE
###EXAMPLE:
###points = [
sys.path.append("/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/mESC")
from getIntraChrHeatmaps import get_chromosomes, extractResolutionFromFileName

genome_db_chrmLevel = genome.Genome(
    "/mnt/storage/home/vsfishman/HiC/fasta/GalGal5/GCF_000002315.4_Gallus_gallus-5.0_assembly_structure/Primary_Assembly/assembled_chromosomes/FASTA/",
    readChrms=[],
    chrmFileTemplate="%s.fna")

hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filteredChrmLevel/ChEF-all-HindIII-100k.hm"

f_out_path = hm_file + '.eig'

NumEigenvectors = 1  # number of eigenvectors to compute

# Read resolution from one of the datasets
resolution = extractResolutionFromFileName(hm_file)

# Define the binnedData object, load data
BD = binnedData(resolution, genome_db_chrmLevel)
BD.simpleLoad(hm_file, 'heatmap')

BD.removeDiagonal()

# Remove bins with less than half of a bin sequenced
BD.removeBySequencedCount(0.5)

# We'll do iterative correction and Eigenvector expansion on trans data only!
# We want to remove cis, because later we want to remove poor regions in trans
BD.removeCis()

# Truncate top 0.05% of interchromosomal counts (possibly, PCR blowouts)
示例#3
0
def filter_bychr_heatmap(hm_file):

    resolution = extractResolutionFromFileName(hm_file)
    if resolution == None:
        raise
    from hiclib import binnedData
    # Create a  object, load the data.
    print "creating an object"
    hmap = binnedData.binnedData(resolution, genome_db)

    print "loading data"
    hmap.simpleLoad(hm_file, "heatmap")

    print "saving pict of heatmap"
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    from mirnylib import plotting

    maxlen = min(10000, len(hmap.dataDict["heatmap"]))

    a = hmap.dataDict["heatmap"][0:maxlen, 0:maxlen]
    figure_path = hm_file + "stage1.png"
    print "saving ", figure_path
    plotting.plot_matrix(np.log(a))
    plt.subplots_adjust(bottom=0.15)
    f = open(figure_path, "wb")
    plt.savefig(figure_path, dpi=600)
    f.close()
    plt.clf()

    # Remove the contacts between loci located within the same bin +/- 1 bin.
    hmap.removeDiagonal(m=1)

    hmap.removeBySequencedCount(
    )  # new filter: omit all bins with less than 0.5 coverage by sequenced bases (i.e. bases present in the genome)

    hmap.removePoorRegions(
        cutoff=0.5, coverage=True
    )  # remove .5% bins with the lowest number of records (i.e. non-zero entrees in the matrix)
    # This filter was updated to remove bins which have zero contacts and one PCR blowout. Those bins would have many reads, but all reads will be with one or few other bins.

    hmap.truncTrans()  # remove PCR blowouts from trans data

    a = hmap.dataDict["heatmap"][0:maxlen, 0:maxlen]
    figure_path = hm_file + "stage2.png"
    print "saving ", figure_path
    plotting.plot_matrix(np.log(a))
    plt.subplots_adjust(bottom=0.15)
    f = open(figure_path, "wb")
    plt.savefig(figure_path, dpi=200)
    f.close()
    plt.clf()

    hmap.iterativeCorrectWithoutSS(force=True)  #do iterative correction

    a = hmap.dataDict["heatmap"][0:maxlen, 0:maxlen]
    figure_path = hm_file + "stage3.png"
    print "saving ", figure_path
    plotting.plot_matrix(np.log(a))
    plt.subplots_adjust(bottom=0.15)
    f = open(figure_path, "wb")
    plt.savefig(figure_path, dpi=600)
    f.close()
    plt.clf()

    # Save the iteratively corrected heatmap.
    hmap.export("heatmap", hm_file + ".IC.hdf5", False)
示例#4
0
from mirnylib import genome
from mirnylib import h5dict
from mirnylib import plotting
from hiclib import binnedData
from hiclib import fragmentHiC
import math

genome_db = genome.Genome(
    "/mnt/storage/home/vsfishman/HiC/fasta/GalGal5/GCF_000002315.4_Gallus_gallus-5.0_assembly_structure/Primary_Assembly/assembled_chromosomes/FASTA/",
    readChrms=[],
    chrmFileTemplate="%s.fna")

hm_file = sys.argv[1]
figure_path = "/mnt/storage/home/vsfishman/HiC/pics/"

domain_res = extractResolutionFromFileName(hm_file)
all_chrms = get_chromosomes(hm_file, genome_db, domain_res)

chrms = range(genome_db.chrmCount)  #numner of chrms in genome
st = genome_db.chrmStartsBinCont  #array of numbers of chrms start (in bins)
end = genome_db.chrmEndsBinCont  #array of numbers of chrms ends (in bins). Numer end[-1] is not in chromosome (this number is 1st bin of next chrm)


def plot_one_chr_fragment(matrix, figure_path, chr_name, ch_start, ch_end):
    print "Plotting picture"
    ch_start = ch_start / domain_res
    ch_end = ch_end / domain_res
    # domain_st = domain_st / domain_res
    # domain_end = domain_end / domain_res
    i = genome_db.label2idx[chr_name]
    q2_0 = matrix[i]
示例#5
0
def filter_hires_heatmap(mode="cis", hm_file=""):
    from hiclib import highResBinnedData

    resolution = extractResolutionFromFileName(hm_file)
    if resolution == None:
        raise

    # Create a  object, load the data.
    print "creating an object"
    hmap = highResBinnedData.HiResHiC(genome_db, resolution)

    print "loading data"
    hmap.loadData(hm_file, mode=mode)

    print "saving pict of heatmap"
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    from mirnylib import plotting

    chr0array = hmap.data[(0, 0)].getData()
    maxlen = min(10000, len(chr0array))

    to_plot = chr0array[0:maxlen, 0:maxlen]
    figure_path = hm_file + "stage1.png"
    print "saving ", figure_path
    plotting.plot_matrix(np.log(to_plot))
    plt.subplots_adjust(bottom=0.15)
    f = open(figure_path, "wb")
    plt.savefig(figure_path, dpi=300)
    f.close()
    plt.clf()

    # Remove the contacts between loci located within the same bin +/- 1 bin.
    hmap.removeDiagonal(m=1)

    to_plot = hmap.data[(0, 0)].getData()[0:maxlen, 0:maxlen]
    figure_path = hm_file + "stage2.png"
    print "saving ", figure_path
    plotting.plot_matrix(np.log(to_plot))
    plt.subplots_adjust(bottom=0.15)
    f = open(figure_path, "wb")
    plt.savefig(figure_path, dpi=300)
    f.close()
    plt.clf()

    # Removes 0.5 percent of regions with low coverage.
    hmap.removePoorRegions(percent=0.5)

    # Perform iterative correction.
    hmap.iterativeCorrection()

    to_plot = hmap.data[(0, 0)].getData()[0:maxlen, 0:maxlen]
    figure_path = hm_file + "stage3.png"
    print "saving ", figure_path
    plotting.plot_matrix(np.log(to_plot))
    plt.subplots_adjust(bottom=0.15)
    f = open(figure_path, "wb")
    plt.savefig(figure_path, dpi=300)
    f.close()
    plt.clf()

    # Save the iteratively corrected heatmap.
    hmap.export(hm_file + ".IC." + mode + ".hdf5")