def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input') parser.add_argument('-o', '--output') parser.add_argument('-v', dest='verbose', action='store_true') args = parser.parse_args() try: len(args.input) > 0 except: usage() sys.exit(2) name_dict = name2acc('rice_line_IRRI_2466.download.list') #~/BigData/00.RD/RelocaTE_i/Real_Data/Rice_50/RelocaTEi/TE_diversity/Rice_50_fastq_allTE_RelocaTE2/11010_TRJ_RelocaTE2 #HEG4_group_RelocaTEi/ERS467761_RelocaTEi/repeat/results/ALL.all_nonref_insert.high_conf.gff #link gff file into "gff" gffs = glob.glob('%s/*/repeat/results/*.high_conf.gff' % (args.input)) createdir('gff') for gff in gffs: dirs = re.split(r'/', gff) name = re.sub(r'_RelocaTEi', r'', dirs[-4]) #print name, name_dict[name], gff #if not name.startswith('niv') and not name.startswith('ruf'): # words = re.split(r'_', name) # name = '%s_%s' %(words[1], words[0]) #os.system('ln -sf %s gff/%s.gff' %(os.path.abspath(gff), name_dict[name])) os.system('grep -v "ping" %s > gff/%s.gff' % (os.path.abspath(gff), name_dict[name])) #create pairwise difference of TE gffs = glob.glob('gff/*.gff') data = defaultdict(lambda: defaultdict(lambda: list())) for i in range(0, len(gffs)): strain1 = os.path.splitext(os.path.split(gffs[i])[1])[0] for j in range(0, len(gffs)): strain2 = os.path.splitext(os.path.split(gffs[j])[1])[0] if not i == j: os.system('bedtools window -w 100 -a %s -b %s > temp.overlap' % (gffs[i], gffs[j])) data[strain1][strain2] = [ line_num(gffs[i]) - line_num('temp.overlap'), line_num(gffs[j]) - line_num('temp.overlap'), line_num('temp.overlap') ] else: data[strain1][strain2] = [0, 0, line_num(gffs[i])] header = ['Strain'] lines = [] for s1 in sorted(data.keys()): #header.append(s1) #line = [s1] for s2 in sorted(data[s1].keys()): if s1 == s2: #line.append(0) continue else: print '%s\t%s\t%s\t%s\t%s' % (s1, s2, data[s1][s2][0], data[s1][s2][1], data[s1][s2][2])
def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--csv') parser.add_argument('-g', '--gff') parser.add_argument('-p', '--ping_code') parser.add_argument('-o', '--output') parser.add_argument('-v', dest='verbose', action='store_true') args = parser.parse_args() try: len(args.csv) > 0 except: usage() sys.exit(2) #outdir if not args.output: args.output = 'mPing_boundary_mPing_GT_Ping_code' createdir(args.output) #read original GFF to determine weather the insertion is shared or reference only locus_gt = readgff(args.gff) #read ping code ping_code = read_ping(args.ping_code) #read and write csv csvs = glob.glob('%s/*.csv' % (args.csv)) for csv in csvs: read_csv(csv, ping_code, locus_gt, args.output)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--csv') parser.add_argument('-p', '--ping_code') parser.add_argument('-o', '--output') parser.add_argument('-v', dest='verbose', action='store_true') args = parser.parse_args() try: len(args.csv) > 0 except: usage() sys.exit(2) #outdir if not args.output: args.output = 'mPing_boundary_mPing_GT_Ping_code' createdir(args.output) #read ping code ping_code = read_ping(args.ping_code) #read and write csv csvs = glob.glob('%s/*.csv' %(args.csv)) for csv in csvs: read_csv(csv, ping_code, args.output)
def pong_3k(infile): ofile = open('pong_fastq.sh', 'w') count = 0 samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.3/bin/samtools' bedtools = '/rhome/cjinfeng/BigData/software/bedtools2-2.19.0/bin/bedtools' with open(infile, 'r') as filehd: for line in filehd: line = line.rstrip() if len(line) > 2 and not line.startswith(r'Taxa'): unit = re.split(r'\t', line) acc = unit[0] bam = 'http://s3.amazonaws.com/3kricegenome/Nipponbare/%s.realigned.bam' % ( acc) #bam = '~/BigData/01.Rice_genomes/A123/00.Bam/A123_MSU7_BWA/A123_0.clean.A123_CLEAN.bam' outdir = os.path.abspath('pong_3k') createdir(outdir) createdir('%s/%s_pong' % (outdir, acc)) local_bam = '%s/%s.realigned.bam' % (outdir, acc) pong_sam = '%s/%s.pong.sam' % (outdir, acc) pong_bam = '%s/%s.pong.bam' % (outdir, acc) pong_sort_bam = '%s/%s.pong.byname.bam' % (outdir, acc) #local_bam = os.path.abspath(local_bam) #pong_bam = os.path.abspath(pong_bam) #pong_sort_bam = os.path.abspath(pong_sort_bam) if not os.path.exists(pong_sort_bam) or int( os.path.getsize(pong_sort_bam)) < 10000: print acc down = 'wget %s -O %s' % (bam, os.path.abspath(local_bam)) index = '%s index %s' % (samtools, os.path.abspath(local_bam)) bam = local_bam #cmd1 = '%s intersect -abam %s -b %s > %s' %(bedtools, bam, os.path.abspath('Pong_2k.gff'), pong_bam) cmd0 = '%s view -H %s > %s' % (samtools, bam, pong_sam) cmd1 = '%s view %s chr11:11434715-11443880 >> %s' % ( samtools, bam, pong_sam) cmd2 = '%s view %s chr02:19902309-19911474 >> %s' % ( samtools, bam, pong_sam) cmd3 = '%s view %s chr06:12413640-12427974 >> %s' % ( samtools, bam, pong_sam) cmd4 = '%s view %s chr06:21718706-21727871 >> %s' % ( samtools, bam, pong_sam) cmd5 = '%s view %s chr09:11300206-11309371 >> %s' % ( samtools, bam, pong_sam) cmd6 = '%s view -Sb %s > %s' % (samtools, pong_sam, pong_bam) cmd7 = '%s sort -n %s -o %s' % (samtools, pong_bam, pong_sort_bam) cmd8 = '%s bamtofastq -i %s -fq %s/%s_pong/%s_Pong_1.fq -fq2 %s/%s_pong/%s_Pong_2.fq' % ( bedtools, pong_sort_bam, outdir, acc, acc, outdir, acc, acc) print >> ofile, '%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s' % ( down, index, cmd0, cmd1, cmd2, cmd3, cmd4, cmd5, cmd6, cmd7, cmd8) count += 1 ofile.close()
def update_RIL(infile, illumina, genotype, raw, bam_dir): data = defaultdict(str) with open (infile, 'r') as filehd: for line in filehd: line = line.rstrip() if len(line) > 2 and not line.startswith(r'#'): #print line unit = re.split(r'\t',line) ril = unit[5] if unit[5].startswith(r'RIL') else 'RIL%s' % (unit[5]) ril_dir = '%s/%s' %(illumina, ril) createdir(ril_dir) #raw fastq fq1_raw = '%s/%s_R1.fastq' %(raw, unit[2].rstrip()) fq2_raw = '%s/%s_R2.fastq' %(raw, unit[2].rstrip()) if unit[2].startswith(r'flowcell'): word = re.split(r'\_', unit[2]) fq1_raw = '%s/%s.fastq' %(raw, '_'.join([word[0], word[1], 'pair1', word[2]])) fq2_raw = '%s/%s.fastq' %(raw, '_'.join([word[0], word[1], 'pair2', word[2]])) if not os.path.isfile(fq1_raw): print 'file not found: %s' %(fq1_raw) #else: # print 'file found: %s' %(fq1_raw) if not os.path.isfile(fq2_raw): print 'file not found: %s' %(fq2_raw) #linked fastq with RIL id and barcode prefix = '%s_%s_FC%sL%s' %(ril, unit[4], unit[0], unit[1]) fq1 = '%s/%s_p1.fq' %(raw, prefix) fq2 = '%s/%s_p2.fq' %(raw, prefix) if not os.path.isfile(fq1): os.system('ln -s %s %s' %(fq1_raw, fq1)) if not os.path.isfile(fq2): os.system('ln -s %s %s' %(fq2_raw, fq2)) #Sofia's bam bam = '%s/%s.recal.bai' %(bam_dir, prefix) bam_all = '%s/%s.*' %(bam_dir, prefix) #illumina/genotype_correct fq1_target = '%s/%s/%s_p1.fq' %(illumina, ril, prefix) fq2_target = '%s/%s/%s_p2.fq' %(illumina, ril, prefix) bam_target = '%s/%s.recal.bai' %(genotype, prefix) bam_all_target = '%s' %(genotype) #print '%s -> %s' %(fq1, fq1_target) #print '%s -> %s' %(fq2, fq2_target) #print '%s -> %s' %(bam, bam_target) if not os.path.isfile(fq1_target): print fq1_target os.system('ln -s %s %s' %(fq1, fq1_target)) if not os.path.isfile(fq2_target): print fq2_target os.system('ln -s %s %s' %(fq2, fq2_target)) if not os.path.isfile(bam_target): print bam_target #print 'ck: %s' %(line) #os.system('ln -s %s %s' %(bam, bam_target)) os.system('ln -s %s %s' %(bam_all, bam_all_target)) return data
def ping_3k(infile): ofile = open('ping_fastq.sh', 'w') count = 0 samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.3/bin/samtools' bedtools = '/rhome/cjinfeng/BigData/software/bedtools2-2.19.0/bin/bedtools' with open(infile, 'r') as filehd: for line in filehd: line = line.rstrip() if len(line) > 2 and not line.startswith(r'Taxa'): unit = re.split(r'\t', line) acc = unit[0] bam = 'http://s3.amazonaws.com/3kricegenome/Nipponbare/%s.realigned.bam' % ( acc) #bam = '~/BigData/01.Rice_genomes/A123/00.Bam/A123_MSU7_BWA/A123_0.clean.A123_CLEAN.bam' outdir = os.path.abspath('ping_3k') createdir(outdir) createdir('%s/%s_ping' % (outdir, acc)) local_bam = '%s/%s.realigned.bam' % (outdir, acc) pong_sam = '%s/%s.ping.sam' % (outdir, acc) pong_bam = '%s/%s.ping.bam' % (outdir, acc) pong_sort_bam = '%s/%s.ping.byname.bam' % (outdir, acc) #local_bam = os.path.abspath(local_bam) #pong_bam = os.path.abspath(pong_bam) #pong_sort_bam = os.path.abspath(pong_sort_bam) if not os.path.exists(pong_sort_bam) or int( os.path.getsize(pong_sort_bam)) == 0: #print acc #down= 'wget %s -O %s' %(bam, os.path.abspath(local_bam)) #index = '%s index %s' %(samtools, os.path.abspath(local_bam)) #cmd1 = '%s intersect -abam %s -b %s > %s' %(bedtools, bam, os.path.abspath('Pong_2k.gff'), pong_bam) cmd0 = '%s view -H %s > %s' % (samtools, bam, pong_sam) cmd3 = '%s view %s chr06:23519641-23528981 >> %s' % ( samtools, bam, pong_sam) cmd6 = '%s view -Sb %s > %s' % (samtools, pong_sam, pong_bam) cmd7 = '%s sort -n %s -o %s' % (samtools, pong_bam, pong_sort_bam) cmd8 = '%s bamtofastq -i %s -fq %s/%s_ping/%s_Ping_1.fq -fq2 %s/%s_ping/%s_Ping_2.fq' % ( bedtools, pong_sort_bam, outdir, acc, acc, outdir, acc, acc) print >> ofile, '%s\n%s\n%s\n%s\n%s' % (cmd0, cmd3, cmd6, cmd7, cmd8) count += 1 ofile.close() if count > 0: runjob('ping_fastq.sh', 5)
def pong_3k(infile): ofile = open('pong_fastq.sh', 'w') count = 0 samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.3/bin/samtools' bedtools = '/rhome/cjinfeng/BigData/software/bedtools2-2.19.0/bin/bedtools' with open (infile, 'r') as filehd: for line in filehd: line = line.rstrip() if len(line) > 2 and not line.startswith(r'Taxa'): unit = re.split(r'\t',line) acc = unit[0] bam = 'http://s3.amazonaws.com/3kricegenome/Nipponbare/%s.realigned.bam' %(acc) #bam = '~/BigData/01.Rice_genomes/A123/00.Bam/A123_MSU7_BWA/A123_0.clean.A123_CLEAN.bam' outdir = os.path.abspath('pong_3k') createdir(outdir) createdir('%s/%s_pong' %(outdir, acc)) local_bam = '%s/%s.realigned.bam' %(outdir, acc) pong_sam = '%s/%s.pong.sam' %(outdir, acc) pong_bam = '%s/%s.pong.bam' %(outdir, acc) pong_sort_bam = '%s/%s.pong.byname.bam' %(outdir, acc) #local_bam = os.path.abspath(local_bam) #pong_bam = os.path.abspath(pong_bam) #pong_sort_bam = os.path.abspath(pong_sort_bam) if not os.path.exists(pong_sort_bam) or int(os.path.getsize(pong_sort_bam)) == 0: #print acc #down= 'wget %s -O %s' %(bam, os.path.abspath(local_bam)) #index = '%s index %s' %(samtools, os.path.abspath(local_bam)) #cmd1 = '%s intersect -abam %s -b %s > %s' %(bedtools, bam, os.path.abspath('Pong_2k.gff'), pong_bam) cmd0 = '%s view -H %s > %s' %(samtools, bam, pong_sam) cmd1 = '%s view %s chr11:11434715-11443880 >> %s' %(samtools, bam, pong_sam) cmd2 = '%s view %s chr02:19902309-19911474 >> %s' %(samtools, bam, pong_sam) cmd3 = '%s view %s chr06:12413640-12427974 >> %s' %(samtools, bam, pong_sam) cmd4 = '%s view %s chr06:21718706-21727871 >> %s' %(samtools, bam, pong_sam) cmd5 = '%s view %s chr09:11300206-11309371 >> %s' %(samtools, bam, pong_sam) cmd6 = '%s view -Sb %s > %s' %(samtools, pong_sam, pong_bam) cmd7 = '%s sort -n %s -o %s' %(samtools, pong_bam, pong_sort_bam) cmd8 = '%s bamtofastq -i %s -fq %s/%s_pong/%s_Pong_1.fq -fq2 %s/%s_pong/%s_Pong_2.fq' %(bedtools, pong_sort_bam, outdir, acc ,acc, outdir, acc, acc) print >> ofile, '%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s' %(cmd0, cmd1, cmd2, cmd3, cmd4, cmd5, cmd6, cmd7, cmd8) count += 1 ofile.close() if count > 0: runjob('pong_fastq.sh', 9)
def pong_3k(infile): ofile = open('ping_fastq.sh', 'w') count = 0 samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.3/bin/samtools' bedtools = '/rhome/cjinfeng/BigData/software/bedtools2-2.19.0/bin/bedtools' with open (infile, 'r') as filehd: for line in filehd: line = line.rstrip() if len(line) > 2 and not line.startswith(r'Taxa'): unit = re.split(r'\t',line) acc = unit[0] bam = 'http://s3.amazonaws.com/3kricegenome/Nipponbare/%s.realigned.bam' %(acc) #bam = '~/BigData/01.Rice_genomes/A123/00.Bam/A123_MSU7_BWA/A123_0.clean.A123_CLEAN.bam' outdir = os.path.abspath('ping_3k') createdir(outdir) createdir('%s/%s_ping' %(outdir, acc)) local_bam = '%s/%s.realigned.bam' %(outdir, acc) pong_sam = '%s/%s.ping.sam' %(outdir, acc) pong_bam = '%s/%s.ping.bam' %(outdir, acc) pong_sort_bam = '%s/%s.ping.byname.bam' %(outdir, acc) #local_bam = os.path.abspath(local_bam) #pong_bam = os.path.abspath(pong_bam) #pong_sort_bam = os.path.abspath(pong_sort_bam) if not os.path.exists(pong_sort_bam) or int(os.path.getsize(pong_sort_bam)) < 2000: print acc, int(os.path.getsize(pong_sort_bam)) down = 'ln -s %s %s' %('%s/%s.realigned.bam' %(os.path.abspath('pong_3k'), acc), os.path.abspath(local_bam)) #down= 'wget %s -O %s' %(bam, os.path.abspath(local_bam)) index = '%s index %s' %(samtools, os.path.abspath(local_bam)) bam = local_bam #cmd1 = '%s intersect -abam %s -b %s > %s' %(bedtools, bam, os.path.abspath('Pong_2k.gff'), pong_bam) cmd0 = '%s view -H %s > %s' %(samtools, bam, pong_sam) cmd3 = '%s view %s chr06:23519641-23528981 >> %s' %(samtools, bam, pong_sam) cmd6 = '%s view -Sb %s > %s' %(samtools, pong_sam, pong_bam) cmd7 = '%s sort -n %s -o %s' %(samtools, pong_bam, pong_sort_bam) cmd8 = '%s bamtofastq -i %s -fq %s/%s_ping/%s_Ping_1.fq -fq2 %s/%s_ping/%s_Ping_2.fq' %(bedtools, pong_sort_bam, outdir, acc ,acc, outdir, acc, acc) print >> ofile, '%s\n%s\n%s\n%s\n%s\n%s\n%s' %(down, index, cmd0, cmd3, cmd6, cmd7, cmd8) count += 1 ofile.close() if count > 0: runjob('ping_fastq.sh', 7)
def simulate_excision(sample): sim_size = 1000 #size of subsample sim_run = 10 #number of run sim_generation = 1000 sample_num = len(sample.keys()) outdir = 'simulation_samplesize%s_numofrun%s' % (sim_size, sim_run) createdir(outdir) for r in range(sim_run): #sample a start sample from all somatic insertions samples = sample_mPing(sample, sim_size) #for n in range(sim_size): # rn = random.randint(1, sample_num) # print 'run%s\tsample%s\trn:%s' %(r, n, rn) # samples.append(sample[int(rn)]) samples_e = evolve(samples, sample, sim_generation) valid_sample(samples_e) prefix = 'Simulation%s' % ('%04d' % (r)) writefile('\n'.join(samples_e.values()), '%s.gff' % (prefix)) distr_gff('%s.gff' % (prefix), prefix) os.system('mv %s.* %s' % (prefix, outdir)) os.system('python Sim_Sum.py --input %s --output %s_results' % (outdir, outdir)) distr_file = '%s_results.mRNA.5primer.distance.distr' % (outdir) R_cmd = ''' error.bar <- function(x, y, upper, lower=upper, color,length=0.06,...){ if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper)) stop("vectors must be same length") arrows(x,y+upper, x, y-lower, col=color,angle=90, code=3, length=length, ...) } pdf("mping_intergenic_5distance_withsim.pdf") par(mar=c(6,4,4,2), cex=1.2) som5 <- read.table("random.mRNA.5primer.distance.distr") #str5 <- read.table("../mPing_distr/Strains.mRNA.5primer.distance.distr") #ril5 <- read.table("../mPing_distr/RIL.mRNA.5primer.distance.distr") sim5 <- read.table("%s") som5 <- som5[-1,] #str5 <- str5[-1,] #ril5 <- ril5[-1,] sim5 <- sim5[-1,] som5 <- som5[-length(som5[,1]),] #str5 <- str5[-length(str5[,1]),] #ril5 <- ril5[-length(ril5[,1]),] sim5 <- sim5[-length(sim5[,1]),] plot(rev(som5[,4]), type='b', pch= 1,lwd = 2 , col="aquamarine3", xaxt='n', frame.plot = FALSE, ylim=c(0,0.2), ylab="Proportion", xlab="") #lines(rev(ril5[,4]), type='b',pch= 2,lwd = 2 , col="steelblue2") #lines(rev(str5[,4]), type='b',pch= 3,lwd = 2 , col="sandybrown") lines(rev(sim5[,4]), type='b',pch= 20, cex=0.2,lwd = 2 , col="dim gray") error.bar(1:length(sim5[,4]), rev(sim5[,4]), rev(sim5[,7]-sim5[,4]), rev(sim5[,7]-sim5[,4]), 'dim gray') #yaxis <- seq(1:length(som5[,1])+0.5 axis(1,seq(1:length(som5[,1])),line=0, labels=rep("",length(som5[,1]))) text(seq(1:length(som5[,1][-1]))+0.5,rep(-0.02,7), cex=1, offset=2,labels=rev(som5[,1]*500/-1000)[-1],srt=55,xpd=TRUE) legend('topright', bty='n', border='NA', lty= c(1,2,3,4), pch = c(1,2,3,20), cex=1 , lwd = 2 ,col=c("aquamarine3", "steelblue2", "sandybrown", "dim gray"), c("Somatic", "RIL", "Strains", "Simulation")) mtext("Distance to TSS (kp)", side=1,cex=1.2, at=9,line=3) dev.off() ''' % (distr_file) writefile(R_cmd, 'mping_intergenic_5distance_withsim.R') os.system('cat mping_intergenic_5distance_withsim.R | R --slave')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-s', '--strain') parser.add_argument('-c', '--cross', action='store_true') parser.add_argument('-o', '--output') parser.add_argument('-v', dest='verbose', action='store_true') args = parser.parse_args() try: len(args.strain) > 0 except: usage() sys.exit(2) pairs = {'HEG4': 'EG4', 'EG4': 'HEG4', 'A123': 'A119', 'A119': 'A123'} bedtools = '/opt/bedtools/2.17.0-25-g7b42b3b/bin/bedtools' bam2fastq = '/rhome/cjinfeng/BigData/software/bam2fastq/bam2fastq-1.1.0/bam2fastq' samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.2/bin/samtools' gff = '../input/%s.hom.gff' % (args.strain) os.system( '%s slop -i %s -g /rhome/cjinfeng/BigData/00.RD/seqlib/MSU7.chrlen -b 5000 | awk \'{print $1"\t"$4"\t"$5}\' > %s.mPing_5kb_flank.bed' % (bedtools, gff, args.strain)) mping_regs = os.path.abspath('%s.mPing_5kb_flank.bed' % (args.strain)) if args.cross: gff = '../input/%s.unique.gff' % (pairs[args.strain]) os.system( '%s slop -i %s -g /rhome/cjinfeng/BigData/00.RD/seqlib/MSU7.chrlen -b 5000 | awk \'{print $1"\t"$4"\t"$5}\' > %s.mPing_5kb_flank.bed' % (bedtools, gff, pairs[args.strain])) mping_regs = os.path.abspath('%s.mPing_5kb_flank.bed' % (pairs[args.strain])) bams = glob.glob('../input/%s/*.bam' % (args.strain)) #os.system('%s slop -i %s -g /rhome/cjinfeng/BigData/00.RD/seqlib/MSU7.chrlen -b 5000 | awk \'{print $1"\t"$4"\t"$5}\' > %s.mPing_5kb_flank.bed' %(bedtools, gff, args.strain)) #os.system('bedtools merge -i ../input/Parent.ALL.mPing.100kb_flank.gff > ../input/Parent.ALL.mPing.100kb_flank.merge.table') #output directory outdir_bam = os.path.abspath('%s.mPing_5kb_flank_bam' % (args.strain)) createdir(outdir_bam) outdir_igv = os.path.abspath('%s.mPing_5kb_flank_igv' % (args.strain)) createdir(outdir_igv) #mping region #mping_regs = os.path.abspath('%s.mPing_5kb_flank.bed' %(args.strain)) mpings = read_gff(gff) #macbook path of files igv_snapshot_dir = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s_snapshot' % ( args.strain) igv_batch_dir = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s_igv' % ( args.strain) igv_bam_dir = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s_bam' % ( args.strain) mping_gff = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s.hom.gff' % ( args.strain) if args.cross: mping_gff = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s.hom.gff' % ( pairs[args.strain]) cmd = [] ofiles = [] for i in range(0, len(bams)): index = i / 4 bam = bams[i] bam = os.path.abspath(bam) prefix = os.path.split(bam)[1] prefix = re.sub(r'.bam', r'.mPing_5kb_flank', prefix) #mping regions cmd.append('%s view -hb -L %s %s > %s/%s.bam' % (samtools, mping_regs, bam, outdir_bam, prefix)) cmd.append('%s index %s/%s.bam' % (samtools, outdir_bam, prefix)) #igv batch if i % 4 == 0: ofile = open('%s/%s.%s.igv' % (outdir_igv, args.strain, index), 'w') ofiles.append(ofile) print >> ofiles[index], 'new' print >> ofiles[index], 'snapshotDirectory %s' % (igv_snapshot_dir) print >> ofiles[index], 'load %s' % (mping_gff) print >> ofiles[index], 'load %s/%s.bam' % (igv_bam_dir, prefix) if i % 4 == 3 or i == len(bams) - 1: for chro in sorted(mpings.keys(), key=int): for pos in sorted(mpings[chro].keys(), key=int): mping = 'Chr%s_%s' % (chro, pos) print >> ofiles[index], 'goto %s' % (mpings[chro][pos][1]) print >> ofiles[index], 'snapshot %s.%s.%s.png' % ( mping, prefix, mpings[chro][pos][0]) for ofile in ofiles: ofile.close() ofile = open('%s_subbam.sh' % (args.strain), 'w') print >> ofile, '\n'.join(cmd) ofile.close()