def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('--nodes') parser.add_argument('--partition') parser.add_argument('--resolution',type=int) parser.add_argument('--re',action='store_true') parser.add_argument('--subset_chromosomes',default='NA') args = parser.parse_args() nodes_modified_file=args.partition+'.tmp' subp.check_output(['bash','-c','zcat -f '+args.nodes+' | sort -k1,1 -k2,2n | gzip > '+nodes_modified_file+'.sorted']) nodes_modified=open(nodes_modified_file,'w') for line in gzip.open(nodes_modified_file+'.sorted','r'): items=line.strip().split('\t') chromo,start,end,name=items[0],items[1],items[2],items[3] if args.subset_chromosomes!='NA': if chromo not in args.subset_chromosomes.split(','): continue nodes_modified.write(re.sub('chr','',chromo)+'\t'+start+'\t'+end+'\t'+name+'\n') nodes_modified.close() if args.re: #restriction fragments myfends=hifive.Fend(args.partition, mode='w') myfends.load_fends(nodes_modified_file, format='bed') else: #uniform bins myfends=hifive.Fend(args.partition, mode='w',binned=int(args.resolution)) myfends.load_bins(nodes_modified_file, format='bed') myfends.save() os.remove(nodes_modified_file) os.remove(nodes_modified_file+'.sorted')
#!/usr/bin/env python2.7 """ Example: $ python hifive_processing.py alignments.raw name """ import hifive import sys rawAlign, name = sys.argv[1], sys.argv[ 2] # Name will be the prefix of output files ## Load in the restriction enzyme digested fend coordinates fend = hifive.Fend('%s_fend.hdf5' % (name), mode='w') fend.load_fends('../ce10nm2.bed', genome_name='ce10', re_name='DpnII', format='bed') fend.save() ## Load in the read data data = hifive.HiCData('%s_data.hdf5' % (name), mode='w') data.load_data_from_bam('%s_fend.hdf5' % (name), rawAlign, maxinsert=500) data.save() ## Create a HiC object hic = hifive.HiC('%s_hic.hdf5' % (name), 'w') hic.load_data('%s_data.hdf5' % (name)) hic.save()
import hifive import sys import os bam1 = sys.argv[1] bam2 = sys.argv[2] RE_bed = sys.argv[3] outdir = sys.argv[4] #RE_bed='/resources/HindIII_hg19_liftover.bed' if not os.path.exists(outdir): os.mkdir(outdir) # Creating a Fend object fend = hifive.Fend(outdir + '/fend_object.hdf5', mode='w') fend.load_fends(RE_bed, re_name='RE', format='bed') fend.save() # Creating a HiCData object data = hifive.HiCData(outdir + '/HiC_data_object.hdf5', mode='w') data.load_data_from_bam(outdir + '/fend_object.hdf5', [bam1, bam2], maxinsert=500) data.save() # Creating a HiC Project object hic = hifive.HiC(outdir + '/HiC_project_object.hdf5', 'w') hic.load_data(outdir + '/HiC_data_object.hdf5') hic.save() # Filtering HiC fends
def run_hifive(self, parameters): fend_file = parameters['fend_file'] bam_file_1 = parameters['bam_file_1'] bam_file_2 = parameters['bam_file_2'] model = parameters['model'] restriction_enzymes = map( str, parameters['restriction_enzyme'].strip('[]').split(',')) if len(restriction_enzymes) == 1: restriction_enzyme = restriction_enzymes[0] else: restriction_enzyme = ','.join(restriction_enzymes) if model == 'Yaffe-Tanay': # Creating a Fend object fend = hifive.Fend('fend_object.hdf5', mode='w') fend.load_fends(fend_file, re_name=restriction_enzyme, format='bed') fend.save() # Creating a HiCData object data = hifive.HiCData('HiC_data_object.hdf5', mode='w') data.load_data_from_bam('fend_object.hdf5', [bam_file_1, bam_file_2], maxinsert=500, skip_duplicate_filtering=False) data.save() # Creating a HiC Project object hic = hifive.HiC('HiC_project_object.hdf5', 'w') hic.load_data('HiC_data_object.hdf5') hic.save() # Filtering HiC fends hic = hifive.HiC('HiC_project_object.hdf5') hic.filter_fends(mininteractions=1, mindistance=0, maxdistance=0) hic.save() # Finding HiC distance function hic = hifive.HiC('HiC_project_object.hdf5') hic.find_distance_parameters(numbins=90, minsize=200, maxsize=0) hic.save() # Learning correction parameters using the binning algorithm hic = hifive.HiC('HiC_project_object.hdf5') hic.find_binning_fend_corrections( max_iterations=1000, mindistance=500000, maxdistance=0, num_bins=[20, 20, 20, 20], model=['len', 'distance', 'gc', 'mappability'], parameters=['even', 'even', 'even', 'even'], usereads='cis', learning_threshold=1.0) hic.save('HiC_norm_binning.hdf5') elif model == 'Hi-Corrector': # Creating a Fend object fend = hifive.Fend('fend_object.hdf5', mode='w') fend.load_fends(fend_file, re_name=restriction_enzyme, format='bed') fend.save() # Creating a HiCData object data = hifive.HiCData('HiC_data_object.hdf5', mode='w') data.load_data_from_bam('fend_object.hdf5', [bam_file_1, bam_file_2], maxinsert=500, skip_duplicate_filtering=False) data.save() # Creating a HiC Project object hic = hifive.HiC('HiC_project_object.hdf5', 'w') hic.load_data('HiC_data_object.hdf5') hic.save()
def run_hifive(self, parameters): fend_file = parameters['fend_file'] bam_file_1 = parameters['bam_file_1'] bam_file_2 = parameters['bam_file_2'] model = parameters['model'] add_gc = bool(parameters['add_gc']) add_mappability = bool(parameters['add_mappability']) restriction_enzymes = map( str, parameters['restriction_enzyme'].strip('[]').split(',')) if len(restriction_enzymes) == 1: restriction_enzyme = restriction_enzymes[0] else: restriction_enzyme = ','.join(restriction_enzymes) # Run for both models if not os.path.isfile('HiC_project_object.hdf5'): fend = hifive.Fend('fend_object.hdf5', mode='w') fend.load_fends(fend_file, re_name=restriction_enzyme, format='bed') fend.save() # Creating a HiCData object data = hifive.HiCData('HiC_data_object.hdf5', mode='w') data.load_data_from_bam('fend_object.hdf5', [bam_file_1, bam_file_2], maxinsert=500, skip_duplicate_filtering=False) data.save() # Creating a HiC Project object hic = hifive.HiC('HiC_project_object.hdf5', 'w') hic.load_data('HiC_data_object.hdf5') hic.save() if model == 'Yaffe-Tanay': if not os.path.isfile('HiC_norm_binning.hdf5'): # Filtering HiC fends hic = hifive.HiC('HiC_project_object.hdf5') hic.filter_fends(mininteractions=1, mindistance=0, maxdistance=0) # Finding HiC distance function hic.find_distance_parameters(numbins=90, minsize=200, maxsize=0) hic.save('HiC_project_object_with_distance_parameters.hdf5') # Learning correction parameters using the binning algorithm my_model = ['len', 'distance'] if add_gc == True: my_model.append('gc') if add_mappability == True: my_model.append('mappability') my_num_bins = [20] * len(my_model) my_parameters = ['even'] * len(my_model) hic.find_binning_fend_corrections(max_iterations=1000, mindistance=500000, maxdistance=0, num_bins=my_num_bins, model=my_model, parameters=my_parameters, usereads='cis', learning_threshold=1.0) hic.save('HiC_norm_binning.hdf5')