예제 #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input')
    parser.add_argument('-o', '--output')
    parser.add_argument('-v', dest='verbose', action='store_true')
    args = parser.parse_args()
    try:
        len(args.input) > 0
    except:
        usage()
        sys.exit(2)

    name_dict = name2acc('rice_line_IRRI_2466.download.list')

    #~/BigData/00.RD/RelocaTE_i/Real_Data/Rice_50/RelocaTEi/TE_diversity/Rice_50_fastq_allTE_RelocaTE2/11010_TRJ_RelocaTE2
    #HEG4_group_RelocaTEi/ERS467761_RelocaTEi/repeat/results/ALL.all_nonref_insert.high_conf.gff
    #link gff file into "gff"
    gffs = glob.glob('%s/*/repeat/results/*.high_conf.gff' % (args.input))
    createdir('gff')
    for gff in gffs:
        dirs = re.split(r'/', gff)
        name = re.sub(r'_RelocaTEi', r'', dirs[-4])
        #print name, name_dict[name], gff
        #if not name.startswith('niv') and not name.startswith('ruf'):
        #    words = re.split(r'_', name)
        #    name  = '%s_%s' %(words[1], words[0])
        #os.system('ln -sf %s gff/%s.gff' %(os.path.abspath(gff), name_dict[name]))
        os.system('grep -v "ping" %s > gff/%s.gff' %
                  (os.path.abspath(gff), name_dict[name]))

    #create pairwise difference of TE
    gffs = glob.glob('gff/*.gff')
    data = defaultdict(lambda: defaultdict(lambda: list()))
    for i in range(0, len(gffs)):
        strain1 = os.path.splitext(os.path.split(gffs[i])[1])[0]
        for j in range(0, len(gffs)):
            strain2 = os.path.splitext(os.path.split(gffs[j])[1])[0]
            if not i == j:
                os.system('bedtools window -w 100 -a %s -b %s > temp.overlap' %
                          (gffs[i], gffs[j]))
                data[strain1][strain2] = [
                    line_num(gffs[i]) - line_num('temp.overlap'),
                    line_num(gffs[j]) - line_num('temp.overlap'),
                    line_num('temp.overlap')
                ]
            else:
                data[strain1][strain2] = [0, 0, line_num(gffs[i])]

    header = ['Strain']
    lines = []
    for s1 in sorted(data.keys()):
        #header.append(s1)
        #line = [s1]
        for s2 in sorted(data[s1].keys()):
            if s1 == s2:
                #line.append(0)
                continue
            else:
                print '%s\t%s\t%s\t%s\t%s' % (s1, s2, data[s1][s2][0],
                                              data[s1][s2][1], data[s1][s2][2])
예제 #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--csv')
    parser.add_argument('-g', '--gff')
    parser.add_argument('-p', '--ping_code')
    parser.add_argument('-o', '--output')
    parser.add_argument('-v', dest='verbose', action='store_true')
    args = parser.parse_args()
    try:
        len(args.csv) > 0
    except:
        usage()
        sys.exit(2)

    #outdir
    if not args.output:
        args.output = 'mPing_boundary_mPing_GT_Ping_code'
    createdir(args.output)

    #read original GFF to determine weather the insertion is shared or reference only
    locus_gt = readgff(args.gff)

    #read ping code
    ping_code = read_ping(args.ping_code)

    #read and write csv
    csvs = glob.glob('%s/*.csv' % (args.csv))
    for csv in csvs:
        read_csv(csv, ping_code, locus_gt, args.output)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--csv')
    parser.add_argument('-p', '--ping_code')
    parser.add_argument('-o', '--output')
    parser.add_argument('-v', dest='verbose', action='store_true')
    args = parser.parse_args()
    try:
        len(args.csv) > 0
    except:
        usage()
        sys.exit(2)
    
    #outdir
    if not args.output:
        args.output = 'mPing_boundary_mPing_GT_Ping_code'
    createdir(args.output)

    #read ping code
    ping_code = read_ping(args.ping_code)
    
    #read and write csv
    csvs = glob.glob('%s/*.csv' %(args.csv))
    for csv in csvs:
        read_csv(csv, ping_code, args.output)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--csv')
    parser.add_argument('-p', '--ping_code')
    parser.add_argument('-o', '--output')
    parser.add_argument('-v', dest='verbose', action='store_true')
    args = parser.parse_args()
    try:
        len(args.csv) > 0
    except:
        usage()
        sys.exit(2)
    
    #outdir
    if not args.output:
        args.output = 'mPing_boundary_mPing_GT_Ping_code'
    createdir(args.output)

    #read ping code
    ping_code = read_ping(args.ping_code)
    
    #read and write csv
    csvs = glob.glob('%s/*.csv' %(args.csv))
    for csv in csvs:
        read_csv(csv, ping_code, args.output)
def pong_3k(infile):
    ofile = open('pong_fastq.sh', 'w')
    count = 0
    samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.3/bin/samtools'
    bedtools = '/rhome/cjinfeng/BigData/software/bedtools2-2.19.0/bin/bedtools'
    with open(infile, 'r') as filehd:
        for line in filehd:
            line = line.rstrip()
            if len(line) > 2 and not line.startswith(r'Taxa'):
                unit = re.split(r'\t', line)
                acc = unit[0]
                bam = 'http://s3.amazonaws.com/3kricegenome/Nipponbare/%s.realigned.bam' % (
                    acc)
                #bam = '~/BigData/01.Rice_genomes/A123/00.Bam/A123_MSU7_BWA/A123_0.clean.A123_CLEAN.bam'
                outdir = os.path.abspath('pong_3k')
                createdir(outdir)
                createdir('%s/%s_pong' % (outdir, acc))
                local_bam = '%s/%s.realigned.bam' % (outdir, acc)
                pong_sam = '%s/%s.pong.sam' % (outdir, acc)
                pong_bam = '%s/%s.pong.bam' % (outdir, acc)
                pong_sort_bam = '%s/%s.pong.byname.bam' % (outdir, acc)
                #local_bam = os.path.abspath(local_bam)
                #pong_bam  = os.path.abspath(pong_bam)
                #pong_sort_bam = os.path.abspath(pong_sort_bam)
                if not os.path.exists(pong_sort_bam) or int(
                        os.path.getsize(pong_sort_bam)) < 10000:
                    print acc
                    down = 'wget %s -O %s' % (bam, os.path.abspath(local_bam))
                    index = '%s index %s' % (samtools,
                                             os.path.abspath(local_bam))
                    bam = local_bam
                    #cmd1 = '%s intersect -abam %s -b %s > %s' %(bedtools, bam, os.path.abspath('Pong_2k.gff'), pong_bam)
                    cmd0 = '%s view -H %s > %s' % (samtools, bam, pong_sam)
                    cmd1 = '%s view %s chr11:11434715-11443880 >> %s' % (
                        samtools, bam, pong_sam)
                    cmd2 = '%s view %s chr02:19902309-19911474 >> %s' % (
                        samtools, bam, pong_sam)
                    cmd3 = '%s view %s chr06:12413640-12427974 >> %s' % (
                        samtools, bam, pong_sam)
                    cmd4 = '%s view %s chr06:21718706-21727871 >> %s' % (
                        samtools, bam, pong_sam)
                    cmd5 = '%s view %s chr09:11300206-11309371 >> %s' % (
                        samtools, bam, pong_sam)
                    cmd6 = '%s view -Sb %s > %s' % (samtools, pong_sam,
                                                    pong_bam)
                    cmd7 = '%s sort -n %s -o %s' % (samtools, pong_bam,
                                                    pong_sort_bam)
                    cmd8 = '%s bamtofastq -i %s -fq %s/%s_pong/%s_Pong_1.fq -fq2 %s/%s_pong/%s_Pong_2.fq' % (
                        bedtools, pong_sort_bam, outdir, acc, acc, outdir, acc,
                        acc)
                    print >> ofile, '%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s' % (
                        down, index, cmd0, cmd1, cmd2, cmd3, cmd4, cmd5, cmd6,
                        cmd7, cmd8)
                    count += 1
    ofile.close()
예제 #6
0
def update_RIL(infile, illumina, genotype, raw, bam_dir):
    data = defaultdict(str)
    with open (infile, 'r') as filehd:
        for line in filehd:
            line = line.rstrip()
            if len(line) > 2 and not line.startswith(r'#'):
                #print line
                unit = re.split(r'\t',line)
                ril  = unit[5] if unit[5].startswith(r'RIL') else 'RIL%s' % (unit[5])
                ril_dir = '%s/%s' %(illumina, ril)
                createdir(ril_dir)
                #raw fastq
                fq1_raw = '%s/%s_R1.fastq' %(raw, unit[2].rstrip())
                fq2_raw = '%s/%s_R2.fastq' %(raw, unit[2].rstrip())
                if unit[2].startswith(r'flowcell'):
                    word = re.split(r'\_', unit[2]) 
                    fq1_raw = '%s/%s.fastq' %(raw, '_'.join([word[0], word[1], 'pair1', word[2]]))
                    fq2_raw = '%s/%s.fastq' %(raw, '_'.join([word[0], word[1], 'pair2', word[2]]))
                if not os.path.isfile(fq1_raw):
                    print 'file not found: %s' %(fq1_raw)
                #else:
                #    print 'file found: %s' %(fq1_raw)
                if not os.path.isfile(fq2_raw):
                    print 'file not found: %s' %(fq2_raw)
                #linked fastq with RIL id and barcode
                prefix = '%s_%s_FC%sL%s' %(ril, unit[4], unit[0], unit[1])
                fq1 = '%s/%s_p1.fq' %(raw, prefix)
                fq2 = '%s/%s_p2.fq' %(raw, prefix)
                if not os.path.isfile(fq1):
                    os.system('ln -s %s %s' %(fq1_raw, fq1))
                if not os.path.isfile(fq2):
                    os.system('ln -s %s %s' %(fq2_raw, fq2))
                #Sofia's bam
                bam = '%s/%s.recal.bai' %(bam_dir, prefix)
                bam_all = '%s/%s.*' %(bam_dir, prefix)
                #illumina/genotype_correct
                fq1_target = '%s/%s/%s_p1.fq' %(illumina, ril, prefix)
                fq2_target = '%s/%s/%s_p2.fq' %(illumina, ril, prefix)
                bam_target = '%s/%s.recal.bai' %(genotype, prefix)
                bam_all_target = '%s' %(genotype)
                #print '%s -> %s' %(fq1, fq1_target)
                #print '%s -> %s' %(fq2, fq2_target)
                #print '%s -> %s' %(bam, bam_target)
                if not os.path.isfile(fq1_target):
                    print fq1_target
                    os.system('ln -s %s %s' %(fq1, fq1_target))
                if not os.path.isfile(fq2_target):
                    print fq2_target
                    os.system('ln -s %s %s' %(fq2, fq2_target))
                if not os.path.isfile(bam_target):
                    print bam_target
                    #print 'ck: %s' %(line)
                    #os.system('ln -s %s %s' %(bam, bam_target))
                    os.system('ln -s %s %s' %(bam_all, bam_all_target))
    return data
예제 #7
0
def ping_3k(infile):
    ofile = open('ping_fastq.sh', 'w')
    count = 0
    samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.3/bin/samtools'
    bedtools = '/rhome/cjinfeng/BigData/software/bedtools2-2.19.0/bin/bedtools'
    with open(infile, 'r') as filehd:
        for line in filehd:
            line = line.rstrip()
            if len(line) > 2 and not line.startswith(r'Taxa'):
                unit = re.split(r'\t', line)
                acc = unit[0]
                bam = 'http://s3.amazonaws.com/3kricegenome/Nipponbare/%s.realigned.bam' % (
                    acc)
                #bam = '~/BigData/01.Rice_genomes/A123/00.Bam/A123_MSU7_BWA/A123_0.clean.A123_CLEAN.bam'
                outdir = os.path.abspath('ping_3k')
                createdir(outdir)
                createdir('%s/%s_ping' % (outdir, acc))
                local_bam = '%s/%s.realigned.bam' % (outdir, acc)
                pong_sam = '%s/%s.ping.sam' % (outdir, acc)
                pong_bam = '%s/%s.ping.bam' % (outdir, acc)
                pong_sort_bam = '%s/%s.ping.byname.bam' % (outdir, acc)
                #local_bam = os.path.abspath(local_bam)
                #pong_bam  = os.path.abspath(pong_bam)
                #pong_sort_bam = os.path.abspath(pong_sort_bam)
                if not os.path.exists(pong_sort_bam) or int(
                        os.path.getsize(pong_sort_bam)) == 0:
                    #print acc
                    #down= 'wget %s -O %s' %(bam, os.path.abspath(local_bam))
                    #index = '%s index %s' %(samtools, os.path.abspath(local_bam))
                    #cmd1 = '%s intersect -abam %s -b %s > %s' %(bedtools, bam, os.path.abspath('Pong_2k.gff'), pong_bam)
                    cmd0 = '%s view -H %s > %s' % (samtools, bam, pong_sam)
                    cmd3 = '%s view %s chr06:23519641-23528981 >> %s' % (
                        samtools, bam, pong_sam)
                    cmd6 = '%s view -Sb %s > %s' % (samtools, pong_sam,
                                                    pong_bam)
                    cmd7 = '%s sort -n %s -o %s' % (samtools, pong_bam,
                                                    pong_sort_bam)
                    cmd8 = '%s bamtofastq -i %s -fq %s/%s_ping/%s_Ping_1.fq -fq2 %s/%s_ping/%s_Ping_2.fq' % (
                        bedtools, pong_sort_bam, outdir, acc, acc, outdir, acc,
                        acc)
                    print >> ofile, '%s\n%s\n%s\n%s\n%s' % (cmd0, cmd3, cmd6,
                                                            cmd7, cmd8)
                    count += 1
    ofile.close()
    if count > 0:
        runjob('ping_fastq.sh', 5)
def pong_3k(infile):
    ofile = open('pong_fastq.sh', 'w')
    count = 0
    samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.3/bin/samtools'
    bedtools = '/rhome/cjinfeng/BigData/software/bedtools2-2.19.0/bin/bedtools'
    with open (infile, 'r') as filehd:
        for line in filehd:
            line = line.rstrip()
            if len(line) > 2 and not line.startswith(r'Taxa'):
                unit = re.split(r'\t',line)
                acc = unit[0]
                bam = 'http://s3.amazonaws.com/3kricegenome/Nipponbare/%s.realigned.bam' %(acc)
                #bam = '~/BigData/01.Rice_genomes/A123/00.Bam/A123_MSU7_BWA/A123_0.clean.A123_CLEAN.bam' 
                outdir = os.path.abspath('pong_3k')
                createdir(outdir)
                createdir('%s/%s_pong' %(outdir, acc))
                local_bam = '%s/%s.realigned.bam' %(outdir, acc)
                pong_sam  = '%s/%s.pong.sam' %(outdir, acc)
                pong_bam  = '%s/%s.pong.bam' %(outdir, acc)
                pong_sort_bam  = '%s/%s.pong.byname.bam' %(outdir, acc)
                #local_bam = os.path.abspath(local_bam)
                #pong_bam  = os.path.abspath(pong_bam)
                #pong_sort_bam = os.path.abspath(pong_sort_bam)
                if not os.path.exists(pong_sort_bam) or int(os.path.getsize(pong_sort_bam)) == 0:
                    #print acc
                    #down= 'wget %s -O %s' %(bam, os.path.abspath(local_bam))
                    #index = '%s index %s' %(samtools, os.path.abspath(local_bam))
                    #cmd1 = '%s intersect -abam %s -b %s > %s' %(bedtools, bam, os.path.abspath('Pong_2k.gff'), pong_bam)
                    cmd0 = '%s view -H %s > %s' %(samtools, bam, pong_sam)
                    cmd1 = '%s view %s chr11:11434715-11443880 >> %s' %(samtools, bam, pong_sam)
                    cmd2 = '%s view %s chr02:19902309-19911474 >> %s' %(samtools, bam, pong_sam)
                    cmd3 = '%s view %s chr06:12413640-12427974 >> %s' %(samtools, bam, pong_sam)
                    cmd4 = '%s view %s chr06:21718706-21727871 >> %s' %(samtools, bam, pong_sam)
                    cmd5 = '%s view %s chr09:11300206-11309371 >> %s' %(samtools, bam, pong_sam)
                    cmd6 = '%s view -Sb %s > %s' %(samtools, pong_sam, pong_bam)
                    cmd7 = '%s sort -n %s -o %s' %(samtools, pong_bam, pong_sort_bam)
                    cmd8 = '%s bamtofastq -i %s -fq %s/%s_pong/%s_Pong_1.fq -fq2 %s/%s_pong/%s_Pong_2.fq' %(bedtools, pong_sort_bam, outdir, acc ,acc, outdir, acc, acc) 
                    print >> ofile, '%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s' %(cmd0, cmd1, cmd2, cmd3, cmd4, cmd5, cmd6, cmd7, cmd8)
                    count += 1
    ofile.close()
    if count > 0:
        runjob('pong_fastq.sh', 9)
def pong_3k(infile):
    ofile = open('ping_fastq.sh', 'w')
    count = 0
    samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.3/bin/samtools'
    bedtools = '/rhome/cjinfeng/BigData/software/bedtools2-2.19.0/bin/bedtools'
    with open (infile, 'r') as filehd:
        for line in filehd:
            line = line.rstrip()
            if len(line) > 2 and not line.startswith(r'Taxa'):
                unit = re.split(r'\t',line)
                acc = unit[0]
                bam = 'http://s3.amazonaws.com/3kricegenome/Nipponbare/%s.realigned.bam' %(acc)
                #bam = '~/BigData/01.Rice_genomes/A123/00.Bam/A123_MSU7_BWA/A123_0.clean.A123_CLEAN.bam' 
                outdir = os.path.abspath('ping_3k')
                createdir(outdir)
                createdir('%s/%s_ping' %(outdir, acc))
                local_bam = '%s/%s.realigned.bam' %(outdir, acc)
                pong_sam  = '%s/%s.ping.sam' %(outdir, acc)
                pong_bam  = '%s/%s.ping.bam' %(outdir, acc)
                pong_sort_bam  = '%s/%s.ping.byname.bam' %(outdir, acc)
                #local_bam = os.path.abspath(local_bam)
                #pong_bam  = os.path.abspath(pong_bam)
                #pong_sort_bam = os.path.abspath(pong_sort_bam)
                if not os.path.exists(pong_sort_bam) or int(os.path.getsize(pong_sort_bam)) < 2000:
                    print acc, int(os.path.getsize(pong_sort_bam))
                    down = 'ln -s %s %s' %('%s/%s.realigned.bam' %(os.path.abspath('pong_3k'), acc), os.path.abspath(local_bam))
                    #down= 'wget %s -O %s' %(bam, os.path.abspath(local_bam))
                    index = '%s index %s' %(samtools, os.path.abspath(local_bam))
                    bam   = local_bam
                    #cmd1 = '%s intersect -abam %s -b %s > %s' %(bedtools, bam, os.path.abspath('Pong_2k.gff'), pong_bam)
                    cmd0 = '%s view -H %s > %s' %(samtools, bam, pong_sam)
                    cmd3 = '%s view %s chr06:23519641-23528981 >> %s' %(samtools, bam, pong_sam)
                    cmd6 = '%s view -Sb %s > %s' %(samtools, pong_sam, pong_bam)
                    cmd7 = '%s sort -n %s -o %s' %(samtools, pong_bam, pong_sort_bam)
                    cmd8 = '%s bamtofastq -i %s -fq %s/%s_ping/%s_Ping_1.fq -fq2 %s/%s_ping/%s_Ping_2.fq' %(bedtools, pong_sort_bam, outdir, acc ,acc, outdir, acc, acc)
                    print >> ofile, '%s\n%s\n%s\n%s\n%s\n%s\n%s' %(down, index, cmd0, cmd3, cmd6, cmd7, cmd8)
                    count += 1
    ofile.close()
    if count > 0:
        runjob('ping_fastq.sh', 7)
예제 #10
0
def simulate_excision(sample):
    sim_size = 1000  #size of subsample
    sim_run = 10  #number of run
    sim_generation = 1000
    sample_num = len(sample.keys())
    outdir = 'simulation_samplesize%s_numofrun%s' % (sim_size, sim_run)
    createdir(outdir)
    for r in range(sim_run):
        #sample a start sample from all somatic insertions
        samples = sample_mPing(sample, sim_size)
        #for n in range(sim_size):
        #    rn = random.randint(1, sample_num)
        #    print 'run%s\tsample%s\trn:%s' %(r, n, rn)
        #    samples.append(sample[int(rn)])
        samples_e = evolve(samples, sample, sim_generation)
        valid_sample(samples_e)
        prefix = 'Simulation%s' % ('%04d' % (r))
        writefile('\n'.join(samples_e.values()), '%s.gff' % (prefix))
        distr_gff('%s.gff' % (prefix), prefix)
        os.system('mv %s.* %s' % (prefix, outdir))
    os.system('python Sim_Sum.py --input %s --output %s_results' %
              (outdir, outdir))
    distr_file = '%s_results.mRNA.5primer.distance.distr' % (outdir)
    R_cmd = '''
error.bar <- function(x, y, upper, lower=upper, color,length=0.06,...){
     if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
     stop("vectors must be same length")
     arrows(x,y+upper, x, y-lower, col=color,angle=90, code=3, length=length, ...)
 }

pdf("mping_intergenic_5distance_withsim.pdf")

par(mar=c(6,4,4,2), cex=1.2)
som5 <- read.table("random.mRNA.5primer.distance.distr")
#str5 <- read.table("../mPing_distr/Strains.mRNA.5primer.distance.distr")
#ril5 <- read.table("../mPing_distr/RIL.mRNA.5primer.distance.distr")
sim5 <- read.table("%s")

som5 <- som5[-1,]
#str5 <- str5[-1,]
#ril5 <- ril5[-1,]
sim5 <- sim5[-1,]

som5 <- som5[-length(som5[,1]),]
#str5 <- str5[-length(str5[,1]),]
#ril5 <- ril5[-length(ril5[,1]),]
sim5 <- sim5[-length(sim5[,1]),]

plot(rev(som5[,4]), type='b', pch= 1,lwd = 2 , col="aquamarine3", xaxt='n', frame.plot = FALSE, ylim=c(0,0.2), ylab="Proportion", xlab="")
#lines(rev(ril5[,4]), type='b',pch= 2,lwd = 2 , col="steelblue2")
#lines(rev(str5[,4]), type='b',pch= 3,lwd = 2 , col="sandybrown")
lines(rev(sim5[,4]), type='b',pch= 20, cex=0.2,lwd = 2 , col="dim gray")
error.bar(1:length(sim5[,4]), rev(sim5[,4]), rev(sim5[,7]-sim5[,4]), rev(sim5[,7]-sim5[,4]), 'dim gray')

#yaxis <- seq(1:length(som5[,1])+0.5
axis(1,seq(1:length(som5[,1])),line=0, labels=rep("",length(som5[,1])))
text(seq(1:length(som5[,1][-1]))+0.5,rep(-0.02,7), cex=1, offset=2,labels=rev(som5[,1]*500/-1000)[-1],srt=55,xpd=TRUE)

legend('topright', bty='n', border='NA', lty= c(1,2,3,4), pch = c(1,2,3,20), cex=1 , lwd = 2 ,col=c("aquamarine3", "steelblue2", "sandybrown", "dim gray"), c("Somatic", "RIL", "Strains", "Simulation"))
mtext("Distance to TSS (kp)", side=1,cex=1.2, at=9,line=3)

dev.off()
''' % (distr_file)
    writefile(R_cmd, 'mping_intergenic_5distance_withsim.R')
    os.system('cat mping_intergenic_5distance_withsim.R | R --slave')
예제 #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--strain')
    parser.add_argument('-c', '--cross', action='store_true')
    parser.add_argument('-o', '--output')
    parser.add_argument('-v', dest='verbose', action='store_true')
    args = parser.parse_args()
    try:
        len(args.strain) > 0
    except:
        usage()
        sys.exit(2)

    pairs = {'HEG4': 'EG4', 'EG4': 'HEG4', 'A123': 'A119', 'A119': 'A123'}

    bedtools = '/opt/bedtools/2.17.0-25-g7b42b3b/bin/bedtools'
    bam2fastq = '/rhome/cjinfeng/BigData/software/bam2fastq/bam2fastq-1.1.0/bam2fastq'
    samtools = '/opt/linux/centos/7.x/x86_64/pkgs/samtools/1.2/bin/samtools'
    gff = '../input/%s.hom.gff' % (args.strain)
    os.system(
        '%s slop -i %s -g /rhome/cjinfeng/BigData/00.RD/seqlib/MSU7.chrlen -b 5000 | awk \'{print $1"\t"$4"\t"$5}\' > %s.mPing_5kb_flank.bed'
        % (bedtools, gff, args.strain))
    mping_regs = os.path.abspath('%s.mPing_5kb_flank.bed' % (args.strain))
    if args.cross:
        gff = '../input/%s.unique.gff' % (pairs[args.strain])
        os.system(
            '%s slop -i %s -g /rhome/cjinfeng/BigData/00.RD/seqlib/MSU7.chrlen -b 5000 | awk \'{print $1"\t"$4"\t"$5}\' > %s.mPing_5kb_flank.bed'
            % (bedtools, gff, pairs[args.strain]))
        mping_regs = os.path.abspath('%s.mPing_5kb_flank.bed' %
                                     (pairs[args.strain]))

    bams = glob.glob('../input/%s/*.bam' % (args.strain))
    #os.system('%s slop -i %s -g /rhome/cjinfeng/BigData/00.RD/seqlib/MSU7.chrlen -b 5000 | awk \'{print $1"\t"$4"\t"$5}\' > %s.mPing_5kb_flank.bed' %(bedtools, gff, args.strain))
    #os.system('bedtools merge -i ../input/Parent.ALL.mPing.100kb_flank.gff > ../input/Parent.ALL.mPing.100kb_flank.merge.table')

    #output directory
    outdir_bam = os.path.abspath('%s.mPing_5kb_flank_bam' % (args.strain))
    createdir(outdir_bam)
    outdir_igv = os.path.abspath('%s.mPing_5kb_flank_igv' % (args.strain))
    createdir(outdir_igv)

    #mping region
    #mping_regs = os.path.abspath('%s.mPing_5kb_flank.bed' %(args.strain))
    mpings = read_gff(gff)

    #macbook path of files
    igv_snapshot_dir = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s_snapshot' % (
        args.strain)
    igv_batch_dir = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s_igv' % (
        args.strain)
    igv_bam_dir = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s_bam' % (
        args.strain)
    mping_gff = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s.hom.gff' % (
        args.strain)
    if args.cross:
        mping_gff = '/Users/jinfengchen/biocluster/IGV_2.3.0/igv_snapshot_batch/Landrace/%s.hom.gff' % (
            pairs[args.strain])

    cmd = []
    ofiles = []
    for i in range(0, len(bams)):
        index = i / 4
        bam = bams[i]
        bam = os.path.abspath(bam)
        prefix = os.path.split(bam)[1]
        prefix = re.sub(r'.bam', r'.mPing_5kb_flank', prefix)

        #mping regions
        cmd.append('%s view -hb -L %s %s > %s/%s.bam' %
                   (samtools, mping_regs, bam, outdir_bam, prefix))
        cmd.append('%s index %s/%s.bam' % (samtools, outdir_bam, prefix))
        #igv batch
        if i % 4 == 0:
            ofile = open('%s/%s.%s.igv' % (outdir_igv, args.strain, index),
                         'w')
            ofiles.append(ofile)
            print >> ofiles[index], 'new'
            print >> ofiles[index], 'snapshotDirectory %s' % (igv_snapshot_dir)
            print >> ofiles[index], 'load %s' % (mping_gff)
        print >> ofiles[index], 'load %s/%s.bam' % (igv_bam_dir, prefix)
        if i % 4 == 3 or i == len(bams) - 1:
            for chro in sorted(mpings.keys(), key=int):
                for pos in sorted(mpings[chro].keys(), key=int):
                    mping = 'Chr%s_%s' % (chro, pos)
                    print >> ofiles[index], 'goto %s' % (mpings[chro][pos][1])
                    print >> ofiles[index], 'snapshot %s.%s.%s.png' % (
                        mping, prefix, mpings[chro][pos][0])

    for ofile in ofiles:
        ofile.close()

    ofile = open('%s_subbam.sh' % (args.strain), 'w')
    print >> ofile, '\n'.join(cmd)
    ofile.close()