genome_db_contig = genome_db_chrmLevel #hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filtered/ChEF-all-HindIII-100k.hm.IC" #second_hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filtered/Blood-all-HindIII-100k.hm.IC" ########################WRITE YOUR HEATMAP HERE######################## hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filteredChrmLevel/ChEF-all-HindIII-40k.hm.IC" domains_files_Arm = "mapped-GalGal5filtered/GalGal5filteredChrmLevel/ChEF-all-HindIII-40k.hm.gzipped_matrix/ChEF-all-HindIII-40k.hm.gzipped_matrix.jucebox_domains.annotation" domains_files_Dix = "/mnt/storage/home/vsfishman/HiC/data/chick/DixonDomainsChEF_all_HindIII_40k.hm.IC_domains_40KB/DixonDomainsChEF_all_HindIII_40k.hm.IC_domains_40KB.jucebox_domains.annotation" second_hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filteredChrmLevel/Blood-all-HindIII-40k.hm.IC" second_domains_files_Arm = "mapped-GalGal5filtered/GalGal5filteredChrmLevel/Blood-all-HindIII-40k.hm.gzipped_matrix/Blood-all-HindIII-40k.hm.gzipped_matrix.jucebox_domains.annotation" second_domains_files_Dix = "/mnt/storage/home/vsfishman/HiC/data/chick/DixonDomainsBlood_all_HindIII_40k.hm.IC_domains_40KB/DixonDomainsBlood_all_HindIII_40k.hm.IC_domains_40KB.jucebox_domains.annotation" resolution = extractResolutionFromFileName(hm_file) assert resolution == extractResolutionFromFileName(second_hm_file) data_dict = {} # a structure to keep chrms arrays data_dict_second_hmap = {} # a structure to keep chrms arrays for chrm,array in enumerate(get_chromosomes(hm_file,genome_db_contig,resolution)): data_dict[chrm] = array for chrm,array in enumerate(get_chromosomes(second_hm_file,genome_db_contig,resolution)): data_dict_second_hmap[chrm] = array #chrm,nt_start,nt_end,title #######################################ADD REGIONS TO PLOT HERE ###EXAMPLE: ###points = [
sys.path.append("/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/mESC") from getIntraChrHeatmaps import get_chromosomes, extractResolutionFromFileName genome_db_chrmLevel = genome.Genome( "/mnt/storage/home/vsfishman/HiC/fasta/GalGal5/GCF_000002315.4_Gallus_gallus-5.0_assembly_structure/Primary_Assembly/assembled_chromosomes/FASTA/", readChrms=[], chrmFileTemplate="%s.fna") hm_file = "/mnt/storage/home/vsfishman/HiC/tutorial_Fishman/chick/mapped-GalGal5filtered/GalGal5filteredChrmLevel/ChEF-all-HindIII-100k.hm" f_out_path = hm_file + '.eig' NumEigenvectors = 1 # number of eigenvectors to compute # Read resolution from one of the datasets resolution = extractResolutionFromFileName(hm_file) # Define the binnedData object, load data BD = binnedData(resolution, genome_db_chrmLevel) BD.simpleLoad(hm_file, 'heatmap') BD.removeDiagonal() # Remove bins with less than half of a bin sequenced BD.removeBySequencedCount(0.5) # We'll do iterative correction and Eigenvector expansion on trans data only! # We want to remove cis, because later we want to remove poor regions in trans BD.removeCis() # Truncate top 0.05% of interchromosomal counts (possibly, PCR blowouts)
def filter_bychr_heatmap(hm_file): resolution = extractResolutionFromFileName(hm_file) if resolution == None: raise from hiclib import binnedData # Create a object, load the data. print "creating an object" hmap = binnedData.binnedData(resolution, genome_db) print "loading data" hmap.simpleLoad(hm_file, "heatmap") print "saving pict of heatmap" import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from mirnylib import plotting maxlen = min(10000, len(hmap.dataDict["heatmap"])) a = hmap.dataDict["heatmap"][0:maxlen, 0:maxlen] figure_path = hm_file + "stage1.png" print "saving ", figure_path plotting.plot_matrix(np.log(a)) plt.subplots_adjust(bottom=0.15) f = open(figure_path, "wb") plt.savefig(figure_path, dpi=600) f.close() plt.clf() # Remove the contacts between loci located within the same bin +/- 1 bin. hmap.removeDiagonal(m=1) hmap.removeBySequencedCount( ) # new filter: omit all bins with less than 0.5 coverage by sequenced bases (i.e. bases present in the genome) hmap.removePoorRegions( cutoff=0.5, coverage=True ) # remove .5% bins with the lowest number of records (i.e. non-zero entrees in the matrix) # This filter was updated to remove bins which have zero contacts and one PCR blowout. Those bins would have many reads, but all reads will be with one or few other bins. hmap.truncTrans() # remove PCR blowouts from trans data a = hmap.dataDict["heatmap"][0:maxlen, 0:maxlen] figure_path = hm_file + "stage2.png" print "saving ", figure_path plotting.plot_matrix(np.log(a)) plt.subplots_adjust(bottom=0.15) f = open(figure_path, "wb") plt.savefig(figure_path, dpi=200) f.close() plt.clf() hmap.iterativeCorrectWithoutSS(force=True) #do iterative correction a = hmap.dataDict["heatmap"][0:maxlen, 0:maxlen] figure_path = hm_file + "stage3.png" print "saving ", figure_path plotting.plot_matrix(np.log(a)) plt.subplots_adjust(bottom=0.15) f = open(figure_path, "wb") plt.savefig(figure_path, dpi=600) f.close() plt.clf() # Save the iteratively corrected heatmap. hmap.export("heatmap", hm_file + ".IC.hdf5", False)
from mirnylib import genome from mirnylib import h5dict from mirnylib import plotting from hiclib import binnedData from hiclib import fragmentHiC import math genome_db = genome.Genome( "/mnt/storage/home/vsfishman/HiC/fasta/GalGal5/GCF_000002315.4_Gallus_gallus-5.0_assembly_structure/Primary_Assembly/assembled_chromosomes/FASTA/", readChrms=[], chrmFileTemplate="%s.fna") hm_file = sys.argv[1] figure_path = "/mnt/storage/home/vsfishman/HiC/pics/" domain_res = extractResolutionFromFileName(hm_file) all_chrms = get_chromosomes(hm_file, genome_db, domain_res) chrms = range(genome_db.chrmCount) #numner of chrms in genome st = genome_db.chrmStartsBinCont #array of numbers of chrms start (in bins) end = genome_db.chrmEndsBinCont #array of numbers of chrms ends (in bins). Numer end[-1] is not in chromosome (this number is 1st bin of next chrm) def plot_one_chr_fragment(matrix, figure_path, chr_name, ch_start, ch_end): print "Plotting picture" ch_start = ch_start / domain_res ch_end = ch_end / domain_res # domain_st = domain_st / domain_res # domain_end = domain_end / domain_res i = genome_db.label2idx[chr_name] q2_0 = matrix[i]
def filter_hires_heatmap(mode="cis", hm_file=""): from hiclib import highResBinnedData resolution = extractResolutionFromFileName(hm_file) if resolution == None: raise # Create a object, load the data. print "creating an object" hmap = highResBinnedData.HiResHiC(genome_db, resolution) print "loading data" hmap.loadData(hm_file, mode=mode) print "saving pict of heatmap" import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from mirnylib import plotting chr0array = hmap.data[(0, 0)].getData() maxlen = min(10000, len(chr0array)) to_plot = chr0array[0:maxlen, 0:maxlen] figure_path = hm_file + "stage1.png" print "saving ", figure_path plotting.plot_matrix(np.log(to_plot)) plt.subplots_adjust(bottom=0.15) f = open(figure_path, "wb") plt.savefig(figure_path, dpi=300) f.close() plt.clf() # Remove the contacts between loci located within the same bin +/- 1 bin. hmap.removeDiagonal(m=1) to_plot = hmap.data[(0, 0)].getData()[0:maxlen, 0:maxlen] figure_path = hm_file + "stage2.png" print "saving ", figure_path plotting.plot_matrix(np.log(to_plot)) plt.subplots_adjust(bottom=0.15) f = open(figure_path, "wb") plt.savefig(figure_path, dpi=300) f.close() plt.clf() # Removes 0.5 percent of regions with low coverage. hmap.removePoorRegions(percent=0.5) # Perform iterative correction. hmap.iterativeCorrection() to_plot = hmap.data[(0, 0)].getData()[0:maxlen, 0:maxlen] figure_path = hm_file + "stage3.png" print "saving ", figure_path plotting.plot_matrix(np.log(to_plot)) plt.subplots_adjust(bottom=0.15) f = open(figure_path, "wb") plt.savefig(figure_path, dpi=300) f.close() plt.clf() # Save the iteratively corrected heatmap. hmap.export(hm_file + ".IC." + mode + ".hdf5")