示例#1
0
def Subsampling(args):
    """
    %prog Subsampling SMs_file vcf_or_vcf.gz
    Subsampling vcf file using bcftools. The samples order will also change following the order in SMs_file.
    """
    p = OptionParser(Subsampling.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())
    SMsfile, vcffile, = args

    prefix = vcffile.split('/')[-1].split('.vcf')[0]
    new_f = prefix + '.subsm.vcf'
    cmd = "bcftools view -S %s %s > %s\n" % (SMsfile, vcffile, new_f)
    print(cmd)
    jobfile = '%s.subsm.slurm' % prefix
    f = open(jobfile, 'w')
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'module load bcftools\n'
    header += cmd
    f.write(header)
    print(
        'slurm file %s.subsm.slurm has been created, you can sbatch your job file.'
        % prefix)
示例#2
0
def CombineRep(args):
    """
    %prog CombinRep dir
    combine all fg.gz files for same sample
    """
    p = OptionParser(CombineRep.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args
    fqs = [i for i in os.listdir(mydir) if i.endswith('fq.gz')]
    fqs = sorted(
        fqs, key=lambda x: int(x.split('.')[0].split('_')[0].split('R')[0]))
    SMs = [x.split('.')[0].split('_')[0].split('R')[0] for x in fqs]
    mydf = pd.DataFrame(dict(zip(['SM', 'FNs'], [SMs, fqs])))
    mygrpdf = mydf.groupby('SM').agg(['count', lambda x: ' '.join(x)])
    f = open('combine_fqs.sh', 'w')
    for sm in mygrpdf.index:
        n, fns = mygrpdf.loc[sm, :]
        cmd = 'cat %s > %s.cbd.fq.gz\n' % (fns, sm)
        f.write(cmd)
    f.close()
    cmd1 = 'chmod +x combine_fqs.sh\n'
    cmd2 = './combine_fqs.sh\n'
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += cmd1
    header += cmd2
    f = open('CombineFQs.slurm' % prefix, 'w')
    f.write(header)
    f.close()
    print(
        'slurm file CombineFQs.slurm has been created, you can sbatch your job file.'
    )
示例#3
0
def cMLM(args):
    """
    %prog cMLM pheno(with header, tab delimited) geno_prefix(GM and GD prefix) PCA Kinship
    
    Run automated GAPIT compressed mixed linear model
    """
    p = OptionParser(cMLM.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())
    
    pheno, geno_prefix, PCA, Kinship = args
    mem = '.'.join(pheno.split('.')[0:-1])
    f1 = open('%s.cMLM.R'%mem, 'w')
    #print(Gapit_header)
    gapit_cmd = Gapit_header%(pheno,geno_prefix,geno_prefix,PCA,Kinship,mem)
    f1.write(gapit_cmd)
    
    f2 = open('%s.cMLM.slurm'%mem, 'w')
    h = Slurm_header
    h += 'module load R/3.3\n'
    header = h%(opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix)
    f2.write(header)
    cmd = 'R CMD BATCH %s.cMLM.R\n'%mem
    f2.write(cmd)
    f1.close()
    f2.close()
    print('R script %s.cMLM.R and slurm file %s.cMLM.slurm has been created, you can sbatch your job file.'%(mem, mem))
示例#4
0
def plot(args):
    """
    %prog plot gwas_out result_prefix

    plt MVP results using MVP.Report function.
    https://github.com/XiaoleiLiuBio/MVP
    """
    p = OptionParser(plot.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    gwasfn, op, = args  # op: output prefix
    f1 = open('%s.plot.R' % op, 'w')
    cmds = '''
    library('MVP')
    myData = read.csv(%s)
    MVP.Report(myData, plot.type='m', col=c("dodgerblue4","deepskyblue"), LOG10=TRUE, ylim=NULL, th
reshold=8.9e-8, threshold.col='grey', chr.den.col=NULL, file='png', memo='MLM', dpi=300)
    '''
    f1.write(MVP_Run_header % (pheno, op, op, op, op))
    f1.close()
    f2 = open('%s.mlm.farmcpu.slurm' % opts.prefix, 'w')
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'module load R\n'
    header += 'R CMD BATCH %s.mlm.farmcpu.R\n' % opts.prefix
    f2.write(header)
    f2.close()
    print('%s.mlm.farmcpu.R and %s.mlm.farmcpu.slurm have been created.' %
          (opts.prefix, opts.prefix))
示例#5
0
def IndexBam(args):
    """
    %prog IndexBam dir
    create the index for bam files
    """
    p = OptionParser(IndexBam.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args
    allfiles = [i for i in os.listdir(mydir) if i.endswith('sorted.bam')]
    print('Total %s sorted.bam files' % len(allfiles))
    for i in allfiles:
        SM = i.split('.')[0]
        cmd = 'samtools index %s\n' % i
        header = Slurm_header % (opts.time, opts.memory, SM, SM, SM)
        header += 'module load samtools/0.1\n'
        header += cmd
        jobfile = '%s.idx.slurm' % SM
        f = open(jobfile, 'w')
        f.write(header)
        f.close()
    print(
        'slurm files *.idx.slurm has been created, you can sbatch your job file.'
    )
示例#6
0
def genPCA(args):
    """
    %prog genPCA hmp N

    Generate first N PCs using tassel
    """
    p = OptionParser(genPCA.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    hmp, N, = args
    out_prefix = hmp.replace('.hmp', '')
    cmd = 'run_pipeline.pl -Xms28g -Xmx29g -fork1 -h %s -PrincipalComponentsPlugin -ncomponents %s -covariance true -endPlugin -export %s_%sPCA -runfork1\n' % (
        hmp, N, out_prefix, N)

    h = Slurm_header
    h += 'ml java/1.8\n'
    h += 'ml tassel/5.2\n'
    header = h % (opts.time, opts.memory, opts.prefix, opts.prefix,
                  opts.prefix)
    header += cmd
    f = open('%s.PCA%s.slurm' % (out_prefix, N), 'w')
    f.write(header)
    f.close()
    print(
        'slurm file %s.PCA%s.slurm has been created, you can sbatch your job file.'
        % (out_prefix, N))
示例#7
0
def IndePvalue(args):
    """
    %prog IndePvalue plink_bed_prefix output

    calculate the number of independent SNPs (Me) and the bonferroni pvalue
    """
    p = OptionParser(IndePvalue.__doc__)
    p.set_slurm_opts(jn=True)
    p.add_option(
        '--cutoff',
        default='0.05',
        choices=('0.01', '0.05'),
        help='choose the pvalue cutoff for the calculation of bonferroni pvalue'
    )
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())

    bed, output = args
    mem = int(opts.memory / 1000) - 2
    cmd = 'java -Xmx%sg -jar %s --noweb --effect-number --plink-binary %s --genome --out %s' % (
        mem, GEC, bed, output)
    h = Slurm_header
    h += 'module load java/1.8\n'
    header = h % (opts.time, opts.memory, opts.prefix, opts.prefix,
                  opts.prefix)
    header += cmd
    f = open('%s.Me_SNP.slurm' % output, 'w')
    f.write(header)
    f.close()
    print(
        'slurm file %s.Me_SNP.slurm has been created, you can sbatch your job file.'
        % output)
示例#8
0
def impute(args):
    """
    %prog impute vcf
    impute missing data in vcf using beagle or linkimpute
    """
    p = OptionParser(impute.__doc__)
    p.set_slurm_opts(jn=True)
    p.add_option('--software',
                 default='linkimpute',
                 choices=('linkimpute', 'beagle'),
                 help='specify the imputation software')
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    vcffile, = args
    prefix = '.'.join(vcffile.split('.')[0:-1])
    new_f = prefix + '.impt.vcf'

    cmd = 'java -Xss100m -Xmx18G -jar %s -v %s %s \n' % (lkipt, vcffile, new_f) \
        if opts.software == 'linkimpute' \
        else 'java -Xss16G -Xmx18G -jar %s gt=%s out=%s.beagle \n' % (begle, vcffile, prefix)
    header = Slurm_header % (opts.time, 20000, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'module load java/1.7 \n' \
        if opts.software == 'linkimpute' \
        else 'module load java/1.8 \n'
    header += cmd
    f = open('%s.%s.slurm' % (prefix, opts.software), 'w')
    f.write(header)
    f.close()
    print('slurm file %s.%s.slurm has been created! ' %
          (prefix, opts.software))
示例#9
0
def vcf2hmp(args):
    """
    %prog vcf2hmp vcf
    convert vcf generated from beagle to hmp format using tassel
    """
    p = OptionParser(vcf2hmp.__doc__)
    p.set_slurm_opts(jn=True)
    p.add_option('--version',
                 default='2',
                 choices=('1', '2'),
                 help='specify the hmp type. 1: hyploid. 2: diploid')
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    vcffile, = args
    prefix = '.'.join(vcffile.split('.')[0:-1])
    cmd = '%s -Xms512m -Xmx10G -fork1 -vcf %s -export -exportType HapmapDiploid\n' % (tassel, vcffile) \
        if opts.version == '2' \
        else '%s -Xms512m -Xmx10G -fork1 -vcf %s -export -exportType Hapmap\n' % (tassel, vcffile)
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'module load java/1.8\n'
    header += cmd
    f = open('%s.vcf2hmp.slurm' % prefix, 'w')
    f.write(header)
    f.close()
    print(
        'slurm file %s.vcf2hmp.slurm has been created, you can submit your job file.'
        % prefix)
示例#10
0
def EstimateLD(args):
    """
    %prog dir_in dir_out
    run LD decay using tassel
    """
    p = OptionParser(EstimateLD.__doc__)
    p.set_slurm_opts(jn=True)
    p.add_option('--pattern', default='*vcf', help='pattern of vcf files')
    p.add_option('--window_size',
                 default='1000',
                 help='specify how many SNPs in the sliding window')
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    dir_in, dir_out = args
    dir_out = Path(dir_out)
    if not dir_out.exists():
        dir_out.mkdir()
    for vcf in Path(dir_in).glob(opts.pattern):
        prefix = vcf.name.replace('.vcf', '')
        out_fn = '%s.ld' % prefix
        cmd = 'run_pipeline.pl -Xms512m -Xmx14g -fork1 -vcf %s -ld -ldWinSize %s -ldType SlidingWindow -td_tab %s/%s\n' % (
            vcf, opts.window_size, dir_out, out_fn)
        header = Slurm_header % (opts.time, 15000, prefix, prefix, prefix)
        header += 'ml java/1.8\n'
        header += 'ml tassel/5.2\n'
        header += cmd
        with open('%s.estLD.slurm' % prefix, 'w') as f:
            f.write(header)
        print(
            'slurm file %s.estLD.slurm has been created, you can submit your job file.'
            % prefix)
示例#11
0
def SummarizeLD(args):
    """
    %prog dir_in dir_out
    summarize LD decay in log scale
    """
    p = OptionParser(EstimateLD.__doc__)
    p.set_slurm_opts(jn=True)
    p.add_option('--pattern',
                 default='*.ld.txt',
                 help='pattern of ld.txt files')
    p.add_option('--max_dist',
                 default='1,000,000',
                 help='the maximum ld distance')
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    dir_in, dir_out = args
    dir_out = Path(dir_out)
    if not dir_out.exists():
        dir_out.mkdir()
    num0 = opts.max_dist.count('0')

    for fn in Path(dir_in).glob(opts.pattern):
        prefix = '.'.join(fn.name.split('.')[0:-1])
        out_fn = '%s.sum.csv' % prefix
        cmd = 'python -m schnablelab.SNPcalling.base SummarizeLD %s %s %s/%s\n' % (
            fn, num0, dir_out, out_fn)
        header = Slurm_header % (opts.time, opts.memory, prefix, prefix,
                                 prefix)
        header += cmd
        with open('%s.sumLD.slurm' % prefix, 'w') as f:
            f.write(header)
        print(
            'slurm file %s.sumLD.slurm has been created, you can submit your job file.'
            % prefix)
示例#12
0
def only_MAF(args):
    """
    %prog in_dir out_dir

    filter MAF
    """
    p = OptionParser(only_MAF.__doc__)
    p.set_slurm_opts(jn=True)
    p.add_option('--pattern',
                 default='*.vcf',
                 help='file pattern for vcf files in dir_in')
    p.add_option('--maf', default='0.01', help='maf cutoff')
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    in_dir, out_dir, = args
    out_path = Path(out_dir)
    if not out_path.exists():
        sys.exit('%s does not exist...')
    dir_path = Path(in_dir)
    vcfs = dir_path.glob(opts.pattern)
    for vcffile in vcfs:
        prefix = '.'.join(vcffile.name.split('.')[0:-1])
        cmd = "python -m schnablelab.SNPcalling.base MAF %s %s\n" % (vcffile,
                                                                     opts.maf)
        with open('%s.maf.slurm' % prefix, 'w') as f:
            header = Slurm_header % (opts.time, opts.memory, prefix, prefix,
                                     prefix)
            header += 'ml bcftools\n'
            header += cmd
            f.write(header)
            print(
                'slurm file %s.maf.slurm has been created, you can sbatch your job file.'
                % prefix)
示例#13
0
def pdf2png(args):
    """
    %prog pdf2png dir_in dir_out

    Run imagemagick to convert pdf to png
    """
    p = OptionParser(pdf2png.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())

    in_dir, out_dir, = args
    out_path = Path(out_dir)
    if not out_path.exists():
        sys.exit('%s does not exist...')
    dir_path = Path(in_dir)
    pdfs = dir_path.glob('*.pdf')
    for pdf in pdfs:
        print(pdf)
        prf = pdf.name.replace('.pdf', '')
        png = pdf.name.replace('.pdf', '.png')
        header = Slurm_header % (100, 15000, prf, prf, prf)
        header += 'ml imagemagick\n'
        cmd = 'convert -density 300 {} -resize 25% {}/{}\n'.format(
            pdf, out_path, png)
        header += cmd
        with open('pdf2png.%s.slurm' % prf, 'w') as f:
            f.write(header)
示例#14
0
def NUM_ALT(args):
    """
    %prog NUM_ALT vcf_or_vcf.gz
    only retain SNPs with only one ALT    
    """
    p = OptionParser(NUM_ALT.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())
    vcffile, = args
    prefix = vcffile.split('.')[0]
    new_f = prefix + '.alt1.vcf'
    cmd = "bcftools view -i 'N_ALT=1' %s > %s" % (vcffile, new_f)
    jobfile = '%s.alt1.slurm' % prefix
    f = open(jobfile, 'w')
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'module load bacftools\n'
    header += cmd
    f.write(header)
    print(
        'slurm file %s.alt1.slurm has been created, you can sbatch your job file.'
        % prefix)
示例#15
0
def ped2bed(args):
    """
    %prog ped_prefix

    Convert plink ped to binary bed format using Plink
    """
    p = OptionParser(ped2bed.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    ped_prefix, = args
    cmd = 'plink --noweb --file %s --make-bed --out %s\n' % (ped_prefix,
                                                             ped_prefix)
    print('run cmd on local:\n%s' % cmd)
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'ml plink\n'
    header += cmd
    f = open('%s.ped2bed.slurm' % ped_prefix, 'w')
    f.write(header)
    f.close()
    print(
        'Job file has been created. You can submit: sbatch -p jclarke %s.ped2bed.slurm'
        % ped_prefix)
示例#16
0
def GLM(args):
    """
    %prog GLM GenoPrefix Pheno Outdir
    RUN automated GEMMA General Linear Model
    """ 
    p = OptionParser(GLM.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    
    if len(args) == 0:
        sys.exit(not p.print_help())
    GenoPrefix, Pheno, Outdir = args
    meanG, annoG = GenoPrefix+'.mean', GenoPrefix+'.annotation'
    outprefix = Pheno.split('.')[0]
    cmd = '%s -g %s -p %s -a %s -lm 4 -outdir %s -o %s' \
        %(gemma, meanG, Pheno, annoG, Outdir, outprefix)
    print('The command running on the local node:\n%s'%cmd)

    h = Slurm_header
    header = h%(opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix)
    header += cmd
    f = open('%s.glm.slurm'%outprefix, 'w')
    f.write(header)
    f.close()
    print('slurm file %s.glm.slurm has been created, you can sbatch your job file.'%outprefix)
示例#17
0
def SortHmp(args):
    """
    %prog SortHmp hmp

    Sort hmp in wired TASSEL way...
    """
    p = OptionParser(SortHmp.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    hmp, = args
    prefix = hmp.replace('.hmp', '')
    out_prefix = hmp.replace('.hmp', '') + '.sorted'
    cmd = 'run_pipeline.pl -Xms16g -Xmx18g -SortGenotypeFilePlugin -inputFile %s -outputFile %s -fileType Hapmap\n' % (
        hmp, out_prefix)
    cmd1 = 'mv %s %s' % (out_prefix + '.hmp.txt', out_prefix + '.hmp')

    h = Slurm_header
    h += 'module load java/1.8\n'
    h += 'module load  tassel/5.2\n'
    header = h % (opts.time, opts.memory, opts.prefix, opts.prefix,
                  opts.prefix)
    header += cmd
    header += cmd1
    f = open('%s.Sort.slurm' % prefix, 'w')
    f.write(header)
    f.close()
    print(
        'slurm file %s.Sort.slurm has been created, you can sbatch your job file.'
        % prefix)
示例#18
0
def MLM(args):
    """
    %prog MLM GenoPrefix('*.mean' and '*.annotation') Pheno Outdir
    RUN automated GEMMA Mixed Linear Model
    """ 
    p = OptionParser(MLM.__doc__)
    p.add_option('--kinship', default=False, 
        help = 'specify the relatedness matrix file name')
    p.add_option('--pca', default=False, 
        help = 'specify the principle components file name')
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    
    if len(args) == 0:
        sys.exit(not p.print_help())
    GenoPrefix, Pheno, Outdir = args
    meanG, annoG = GenoPrefix+'.mean', GenoPrefix+'.annotation'
    outprefix = '.'.join(Pheno.split('/')[-1].split('.')[0:-1])
    cmd = '%s -g %s -p %s -a %s -lmm 4 -outdir %s -o %s' \
        %(gemma, meanG, Pheno, annoG, Outdir, outprefix)
    if opts.kinship:
        cmd += ' -k %s'%opts.kinship
    if opts.pca:
        cmd += ' -c %s'%opts.pca
    print('The command running on the local node:\n%s'%cmd)

    h = Slurm_header
    header = h%(opts.time, opts.memory, opts.prefix, opts.prefix, opts.prefix)
    header += cmd
    f = open('%s.mlm.slurm'%outprefix, 'w')
    f.write(header)
    f.close()
    print('slurm file %s.mlm.slurm has been created, you can sbatch your job file.'%outprefix)
示例#19
0
def hmp2vcf(args):
    """
    %prog hmp2vcf hmp
    convert hmp to vcf format using tassel
    """
    p = OptionParser(hmp2vcf.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    hmpfile, = args
    prefix = '.'.join(hmpfile.split('.')[0:-1])
    cmd = 'run_pipeline.pl -Xms512m -Xmx10G -fork1 -h %s -export -exportType VCF\n' % (
        hmpfile)
    print(cmd)
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'ml tassel/5.2\n'
    header += cmd
    f = open('%s.hmp2vcf.slurm' % prefix, 'w')
    f.write(header)
    f.close()
    print(
        'slurm file %s.hmp2vcf.slurm has been created, you can sbatch your job file.'
        % prefix)
示例#20
0
def farmcpu(args):
    """
    %prog farmcpu pheno(with header, tab delimited) geno_prefix(GM(chr must be nums) and GD prefix) PCA

    Run automated FarmCPU
    """
    p = OptionParser(farmcpu.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())

    pheno, geno_prefix, PCA = args
    mem = '.'.join(pheno.split('/')[-1].split('.')[0:-1])
    f1 = open('%s.FarmCPU.R' % mem, 'w')
    farmcpu_cmd = FarmCPU_header % (pheno, geno_prefix, geno_prefix, PCA, mem)
    f1.write(farmcpu_cmd)

    f2 = open('%s.FarmCPU.slurm' % mem, 'w')
    h = Slurm_header
    h += 'module load R/3.3\n'
    header = h % (opts.time, opts.memory, opts.prefix, opts.prefix,
                  opts.prefix)
    f2.write(header)
    cmd = 'R CMD BATCH %s.FarmCPU.R' % mem
    f2.write(cmd)
    f1.close()
    f2.close()
    print(
        'R script %s.FarmCPU.R and slurm file %s.FarmCPU.slurm has been created, you can sbatch your job file.'
        % (mem, mem))
示例#21
0
def keras_cnn(args):
    """
    %prog train_dir val_dir num_category model_name_prefix
    
    Run vgg model
    """
    p = OptionParser(keras_cnn.__doc__)
    p.add_option('--epoch', default=500, help = 'number of epoches')
    p.add_option('--lr_n', default=1, type='int',
        help = 'train model with differnt learning rates. if n=1: set lr to 0.001. if n>1: try differnt lr from 1e-2 to 1e-5 n times')
    p.set_slurm_opts(gpu=True)
    opts, args = p.parse_args(args)

    if len(args) != 4:
        sys.exit(not p.print_help())

    train_dir, val_dir, numC, mnp = args #mnp:model name prefix
    out_fns = fns(mnp, n=opts.lr_n)
    for i in range(int(opts.lr_n)):
        cmd = 'python -m schnablelab.CNN.keras_vgg %s %s %s %s %s %s'%(train_dir, val_dir, numC, out_fns.lrs[i], opts.epoch, out_fns.model_name[i]) 
        SlurmHeader = Slurm_gpu_header%(opts.time, opts.memory, out_fns.model_name[i], out_fns.model_name[i], out_fns.model_name[i])
        SlurmHeader += 'module load anaconda\nsource activate MCY\n'
        SlurmHeader += cmd
        f = open('%s.slurm'%out_fns.model_name[i], 'w')
        f.write(SlurmHeader)
        f.close()
        print('slurm file %s.slurm has been created, you can sbatch your job file.'%out_fns.model_name[i])
示例#22
0
def CallHeightBatch(args):
    """
    %prog imagePattern("CM*.polish.png")
    generate height call jobs for all polished image files
    """
    p = OptionParser(CallHeightBatch.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    pattern, = args
    all_pngs = glob(pattern)
    for i in all_pngs:
        out_prefix = i.split('/')[-1].split('.polish.png')[0]
        jobname = out_prefix + '.Height'
        cmd = 'python -m schnablelab.CNN.CallHeight CallHeight %s %s\n' % (
            i, out_prefix)
        header = Slurm_header % (opts.time, opts.memory, jobname, jobname,
                                 jobname)
        header += "ml anaconda\nsource activate %s\n" % opts.env
        header += cmd
        jobfile = open('%s.CallHeight.slurm' % out_prefix, 'w')
        jobfile.write(header)
        jobfile.close()
        print('%s.CallHeight.slurm call height job file generated!' % jobname)
示例#23
0
def Sam2Bam(args):
    """
    %prog Sam2Bam dir
    Convert sam to bam format
    """
    p = OptionParser(Sam2Bam.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args
    allfiles = [i for i in os.listdir(mydir) if i.endswith('sam')]
    print('Total %s sam files' % len(allfiles))
    for i in allfiles:
        SM = i.split('.')[0]
        output = '%s.bam' % SM
        cmd = 'samtools view -bS %s > %s\n' % (i, output)
        header = Slurm_header % (opts.time, opts.memory, SM, SM, SM)
        header += 'module load samtools/0.1\n'
        header += cmd
        jobfile = '%s.sam2bam.slurm' % SM
        f = open(jobfile, 'w')
        f.write(header)
        f.close()
    print(
        'slurm files *.sam2bam.slurm has been created, you can sbatch your job file.'
    )
示例#24
0
def RunMACS2(args):
    """
    %prog species(bd, si, sb) out_prefix BAMs(separated by comma)
    call peaks using all bam files
    """
    p = OptionParser(RunMACS2.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    species, out_prefix, bams, = args
    all_bams = ' '.join([i for i in bams.split(',')])
    print('BAMS: %s' % all_bams)
    g_dict = {'bd': '2e8', 'si': '3e8', 'sb': '6e8'}
    cmd = 'macs2 callpeak -t %s -n %s --outdir %s -f BAM -q 0.01 -g %s -B --nomodel --shift 37 --extsize 73\n' % (
        all_bams, out_prefix, out_prefix, g_dict[species])
    header = Slurm_header % (opts.time, opts.memory, out_prefix, out_prefix,
                             out_prefix)
    header += 'module load macs2\n'
    header += cmd
    jobfile = '%s.macs2.slurm' % out_prefix
    f = open(jobfile, 'w')
    f.write(header)
    f.close()
    print(
        'slurm files %s.macs2.slurm has been created, you can sbatch your job file.'
    )
示例#25
0
def SNPsCall(args):
    """
    %prog SNPsCall ref info
    create the index for bam files
    """
    p = OptionParser(SNPsCall.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    ref, info, = args
    allfiles = [i for i in os.listdir('.') if i.endswith('sorted.bam')]
    print('Total %s sorted.bam files' % len(allfiles))
    f1 = open('bamfiles.fb.list', 'w')
    for i in allfiles:
        f1.write(i + '\n')
    f1.close()

    f2 = open(info)
    chrlist = [i.rstrip() for i in f2]
    for seq in chrlist:
        cmd = '/work/schnablelab/cmiao/SorghumGWAS/scripts/freebayes/bin/freebayes -r %s -f %s -C 1 -L bamfiles.fb.list > %s\n' % (
            seq, ref, "_".join(seq.split(':')) + '.vcf')
        header = Slurm_header % (opts.time, opts.memory, seq, seq, seq)
        header += cmd
        jobfile = '%s.fb.slurm' % ("_".join(seq.split(':')))
        f = open(jobfile, 'w')
        f.write(header)
        f.close()
    print(
        'slurm files *.fb.slurm has been created, you can sbatch your job file.'
    )
示例#26
0
def only_ALT(args):
    """
    %prog in_dir out_dir

    filter number of ALT using bcftools
    """
    p = OptionParser(only_ALT.__doc__)
    p.set_slurm_opts(jn=True)
    p.add_option('--pattern', default='*.vcf',
                 help='file pattern for vcf files in dir_in')
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    in_dir, out_dir, = args
    out_path = Path(out_dir)
    if not out_path.exists():
        sys.exit('%s does not exist...')
    dir_path = Path(in_dir)
    vcfs = dir_path.glob(opts.pattern)
    for vcffile in vcfs:
        prefix = '.'.join(vcf.name.split('.')[0:-1])
        new_f = prefix + '.alt1.vcf'
        cmd = "bcftools view -i 'N_ALT=1' %s > %s"%(vcffile, new_f)
        with open('%s.alt1.slurm'%prefix, 'w') as f:
            header = Slurm_header%(opts.time, opts.memory, prefix, prefix, prefix)
            header += 'ml bacftools\n'
            header += cmd
            f.write(header)
            print('slurm file %s.alt1.slurm has been created, you can sbatch your job file.'%prefix)
示例#27
0
def Trim(args):
    """
    %prog Trim dir
    quality control on raw fq.gz using Trimmomatric
    """
    p = OptionParser(Trim.__doc__)
    p.set_slurm_opts(array=False)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args
    allfiles = [i for i in os.listdir(mydir) if i.endswith('.fq.gz')]
    print('Total %s fastq.gz files' % len(allfiles))
    for i in allfiles:
        sm = i.split('.')[0]
        cmd1 = 'java -jar $TM_HOME/trimmomatic.jar SE %s %s CROP:185 SLIDINGWINDOW:4:15 MINLEN:30' % (
            i, sm + '.trimed.fq\n')
        cmd2 = 'gzip %s' % (sm + '.trimed.fq\n')
        header = Slurm_header % (opts.time, opts.memory, SM, SM, SM)
        header += cmd1
        header += cmd2
        jobfile = '%s.trimc.slurm' % sm
        f = open(jobfile, 'w')
        f.write(header)
        f.close()
    print(
        'slurm files *.trimed.slurm has been created, you can sbatch your job file.'
    )
示例#28
0
def hmp2ped(args):
    """
    %prog hmp

    Convert hmp to plink ped format using Tassel
    """
    p = OptionParser(hmp2ped.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    hmp, = args
    prefix = '.'.join(hmp.split('.')[0:-1])
    cmd = 'run_pipeline.pl -Xms512m -Xmx38G -fork1 -h %s -export -exportType Plink\n' % hmp
    header = Slurm_header % (opts.time, opts.memory, opts.prefix, opts.prefix,
                             opts.prefix)
    header += 'ml java/1.8\n'
    header += 'ml tassel/5.2\n'
    header += cmd
    f = open('%s.hmp2ped.slurm' % prefix, 'w')
    f.write(header)
    f.close()
    print(
        'Job file has been created. You can submit: sbatch -p jclarke %s.hmp2ped.slurm'
        % prefix)
示例#29
0
def PredictSlurmGPU(args):
    """
    %prog model_name npyPattern("CM*.npy") job_n
    generate prediction GPU jobs for all npy files
    """
    p = OptionParser(PredictSlurmGPU.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)
    if len(args) == 0:
        sys.exit(not p.print_help())
    mn, npy_pattern, jobn, = args
    if opts.prefix == 'myjob':
        print('specify job name prefix!')
        sys.exit()

    npys = glob(npy_pattern)
    print(len(npys))
    grps = cutlist(npys, int(jobn))
    for gn, grp in grps:
        st, ed = gn.split('-')
        ed = int(ed) + 1
        gn = '%s-%s' % (st, ed)
        cmd = "python -m schnablelab.CNN.Predict Predict %s '%s' %s\n" % (
            mn, npy_pattern, gn)
        opt = '%s.%s' % (opts.prefix, gn)
        header = Slurm_gpu_header % (opts.time, opts.memory, opt, opt, opt)
        header += "ml anaconda\nsource activate MCY\n"
        header += cmd
        with open('%s.gpu.slurm' % opt, 'w') as f:
            f.write(header)
        print('%s.gpu.slurm prediction GPU job file generated!' % opt)
示例#30
0
def Preprocess(args):
    """
    %prog Preprocess dir
    1, Only keep variants: number of ALT==1, quality score >=10, MAF>=0.01, missing rate>0.3, type is snp. 
    2, split msnp to snps.
    only applicable on the unimputed vcf files.
    """
    p = OptionParser(Preprocess.__doc__)
    p.set_slurm_opts(jn=True)
    opts, args = p.parse_args(args)

    if len(args) == 0:
        sys.exit(not p.print_help())
    mydir, = args

    allfiles = [i for i in os.listdir('.') if i.endswith('.vcf')]
    print('Total %s .vcf files' % len(allfiles))
    for i in allfiles:
        SM = i.split('.')[0]
        cmd = "bcftools view -i 'N_ALT==1 && QUAL>=10 && MAF>=0.01 && NS/N_SAMPLES > 0.3' -v 'snps' %s | bcftools -m -snps > %s.prprcss.vcf" % (
            i, SM)
        jobfile = '%s.PreprocessVCF.slurm' % SM
        f = open(jobfile, 'w')
        header = Slurm_header % (opts.time, opts.memory, SM, SM, SM)
        header += 'module load bcftools\n'
        header += cmd
        f.write(header)
        f.close()
    print(
        'slurm file %s.PreprocessVCF.slurm has been created, now you can sbatch your job files.'
        % SM)