# working directories chunk_dir = wd + '/chunks_for_imp' imp_dir = wd + '/imp_sub' os.mkdir(chunk_dir) os.mkdir(imp_dir) print 'Genomic chunks: %s' % chunk_dir print 'Imputation: %s' % imp_dir ###################### print '\n...Creating genomic chunks for imputation...' ###################### os.chdir(chunk_dir) link(str(shape_dir)+'/'+str(args.bfile)+'.hg19.ch.fl.bim', str(args.bfile)+'.hg19.ch.fl.bim', 'reference-aligned bim file') # create chunks chunk_call = [chunker_ex, '--bfile',str(args.bfile)+'.hg19.ch.fl', '--out',str(args.out), addout_txt[0],addout_txt[1], '--Mb-size',str(args.Mb_size), '--snp-size',str(args.snp_size), '--chr-info-file',str(args.chr_info_file)] chunk_call = filter(None,chunk_call) chunk_log = open('chunk.'+str(outdot)+'.log', 'w') print ' '.join(chunk_call) + '\n' subprocess.check_call(chunk_call, stderr=subprocess.STDOUT, stdout=chunk_log) chunk_log.close()
fam.close() outfam.close() famtrans.close() print 'New fam file written to: %s\n' % (str(pi_dir) + '/' + str(outfam_file)) print 'Fam file translation written to: %s\n' % (str(pi_dir) + '/' + str(famtrans_file)) ###################### print '\n...Splitting by chromosome...' ###################### os.chdir(wd) os.mkdir('phase_chr') os.chdir(wd + '/phase_chr') link(pi_dir + '/' + str(args.bfile) + '.hg19.ch.fl.bed', str(args.bfile) + '.hg19.ch.fl.bed', 'aligned .bed file') link(pi_dir + '/' + str(args.bfile) + '.hg19.ch.fl.bim', str(args.bfile) + '.hg19.ch.fl.bim', 'aligned .bim file') link(pi_dir + '/' + str(args.bfile) + '.hg19.ch.fl.fam.idnum', str(args.bfile) + '.hg19.ch.fl.fam', 'aligned .fam file') link(pi_dir + '/' + str(args.bfile) + '.hg19.ch.fl.fam.transl', str(args.bfile) + '.hg19.ch.fl.fam.transl', 'fam number translation file') # TODO: handle empty chromosomes for i in xrange(1, 23): chr_log = open(str(outdot) + '.chr' + str(i) + '.log', 'w') chr_call = [ plinkx, '--bfile', str(args.bfile) + '.hg19.ch.fl', '--chr', str(i), '--make-bed', '--silent', '--memory', str(2000), '--out',
print '\n############' print '\n' print 'All jobs submitted.\n' exit(0) ###################### print '\n...Setting up working directory...' ###################### # working directories proc_dir = wd + '/imp_postproc' os.mkdir(proc_dir) print 'Impute2 output processing: %s' % proc_dir os.chdir(proc_dir) link( str(chunk_dir) + '/' + str(outdot) + '.chunks.txt', str(outdot) + '.chunks.txt', 'genomic chunk results') ###################### print '\n...Generating best-guess genotypes...' ###################### # TODO: flex queue/mem reqs uger_bg_template = """#!/usr/bin/env sh #$ -j y #$ -cwd #$ -V #$ -N {jname} #$ -q short #$ -l m_mem_free=4g #$ -t 1-{nchunk}
############# qcdir = 'qc_'+str(args.out) print '\n...Setting up working directory (./%s)...' % qcdir ############# wd = os.getcwd() if os.path.isdir(qcdir): raise IOError ('Output directory %s already exists. Stopping to prevent overwriting files.' % qcdir) else: os.makedirs(qcdir) os.chdir(qcdir) # link plink files (with verification) link(str(wd+'/'+args.bfile+'.bed'), str(args.bfile+'.bed'), 'input bed file') link(str(wd+'/'+args.bfile+'.bim'), str(args.bfile+'.bim'), 'input bim file') if args.skip_fid_tags: link(str(wd+'/'+args.bfile+'.fam'), str(args.bfile+'.fam'), 'input fam file') else: link(str(wd+'/'+args.bfile+'.fam'), str(args.bfile+'.fam.original'), 'input fam file') # setup SNP, ID QC fail lists snpout_nam = args.out + '.exclude_snps.txt' idout_nam = args.out + '.exclude_iids.txt' ############# if not args.skip_fid_tags: print '\n...Preparing tags for FIDs...'
fam.close() outfam.close() famtrans.close() print 'New fam file written to: %s\n' % (str(pi_dir)+'/'+str(outfam_file)) print 'Fam file translation written to: %s\n' % (str(pi_dir)+'/'+str(famtrans_file)) ###################### print '\n...Splitting by chromosome...' ###################### os.chdir(wd) os.mkdir('phase_chr') os.chdir(wd+'/phase_chr') link(pi_dir + '/' +str(args.bfile) +'.hg19.ch.fl.bed', str(args.bfile) +'.hg19.ch.fl.bed', 'aligned .bed file') link(pi_dir + '/' +str(args.bfile) +'.hg19.ch.fl.bim', str(args.bfile) +'.hg19.ch.fl.bim', 'aligned .bim file') link(pi_dir + '/' +str(args.bfile) +'.hg19.ch.fl.fam.idnum', str(args.bfile) +'.hg19.ch.fl.fam', 'aligned .fam file') link(pi_dir + '/' +str(args.bfile) +'.hg19.ch.fl.fam.transl', str(args.bfile) +'.hg19.ch.fl.fam.transl', 'fam number translation file') # TODO: handle empty chromosomes for i in xrange(1,23): chr_log = open(str(outdot) + '.chr' + str(i) + '.log', 'w') chr_call = [plinkx, '--bfile', str(args.bfile) + '.hg19.ch.fl', '--chr', str(i), '--make-bed', '--silent', '--memory', str(2000), '--out', str(args.bfile) + '.hg19.ch.fl.chr' + str(i)]
print 'Begin!' print '############' ############# print '\n...Setting up working directory (%s)...' % str(args.outdir) ############# wd = os.getcwd() if not os.path.isdir(str(args.outdir)): os.makedirs(str(args.outdir)) os.chdir(args.outdir) # link plink files (with verification) link(str(wd + '/' + args.unrel_bfile + '.bed'), str(args.unrel_bfile + '.bed'), 'bed file for unrelated individuals') link(str(wd + '/' + args.unrel_bfile + '.bim'), str(args.unrel_bfile + '.bim'), 'bim file for unrelated individuals') link(str(wd + '/' + args.unrel_bfile + '.fam'), str(args.unrel_bfile + '.fam'), 'fam file for unrelated individuals') link(str(wd + '/' + args.target_bfile + '.bed'), str(args.target_bfile + '.bed'), 'bed file for target individuals') link(str(wd + '/' + args.target_bfile + '.bim'), str(args.target_bfile + '.bim'), 'bim file for target individuals') link(str(wd + '/' + args.target_bfile + '.fam'), str(args.target_bfile + '.fam'), 'fam file for target individuals') # link pca file, if provided if not (args.plot_admix_pca == None or args.plot_admix_pca == "None"):
print '\n' print '############' print 'Begin!' print '############' ###################### print '\n...Setting up working directory ./%s...' % str(outdir) ###################### wd = os.getcwd() os.mkdir(outdir) os.chdir(outdir) link(wd+'/'+str(args.bfile)+'.bed', str(args.bfile)+'.bed', 'input plink bed file') link(wd+'/'+str(args.bfile)+'.bim', str(args.bfile)+'.bim', 'input plink bim file') link(wd+'/'+str(args.bfile)+'.fam', str(args.bfile)+'.fam', 'input plink fam file') if str(args.strict_bfile) != str(args.bfile): link(wd+'/'+str(args.strict_bfile)+'.bed', str(args.strict_bfile)+'.bed', 'plink bed file for GRM') link(wd+'/'+str(args.strict_bfile)+'.bim', str(args.strict_bfile)+'.bim', 'plink bim file for GRM') link(wd+'/'+str(args.strict_bfile)+'.fam', str(args.strict_bfile)+'.fam', 'plink fam file for GRM') if args.covar is not None and str(args.covar) != "None": link(wd+'/'+str(args.covar), str(args.covar), 'covariate file') if args.pheno is not None and str(args.pheno) != "None": link(wd+'/'+str(args.pheno), str(args.pheno), 'phenotype file') if args.info_file is not None and str(args.info_file) != "None":
print '\n' print 'All jobs submitted.\n' exit(0) ###################### print '\n...Setting up working directory...' ###################### # working directories proc_dir = wd + '/imp_postproc' os.mkdir(proc_dir) print 'Impute2 output processing: %s' % proc_dir os.chdir(proc_dir) link(str(chunk_dir)+'/'+str(outdot)+'.chunks.txt', str(outdot)+'.chunks.txt', 'genomic chunk results') ###################### print '\n...Generating best-guess genotypes...' ###################### # TODO: flex queue/mem reqs uger_bg_template = """#!/usr/bin/env sh #$ -j y #$ -cwd #$ -V #$ -N {jname} #$ -q short #$ -l m_mem_free=4g #$ -t 1-{nchunk}
print '############' ############# print '\n...Setting up working directory (%s)...' % str(args.outdir) ############# wd = os.getcwd() if not os.path.isdir(str(args.outdir)): os.makedirs(str(args.outdir)) os.chdir(args.outdir) # link plink files (with verification) if run_admix or args.use_exemplars: link(str(wd + '/' + args.unrel_bfile + '.bed'), str(args.unrel_bfile + '.bed'), 'bed file for unrelated individuals') link(str(wd + '/' + args.unrel_bfile + '.bim'), str(args.unrel_bfile + '.bim'), 'bim file for unrelated individuals') link(str(wd + '/' + args.unrel_bfile + '.fam'), str(args.unrel_bfile + '.fam'), 'fam file for unrelated individuals') link(str(wd + '/' + args.target_bfile + '.bed'), str(args.target_bfile + '.bed'), 'bed file for target individuals') link(str(wd + '/' + args.target_bfile + '.bim'), str(args.target_bfile + '.bim'), 'bim file for target individuals') link(str(wd + '/' + args.target_bfile + '.fam'), str(args.target_bfile + '.fam'), 'fam file for target individuals') # link pca file, if provided if not (args.plot_admix_pca == None or args.plot_admix_pca == "None"): link(os.path.normpath(str(wd + '/' + args.plot_admix_pca)),
print 'Begin!' print '############' ############# print '\n...Setting up working directory (%s)...' % str(args.outdir) ############# wd = os.getcwd() if not os.path.isdir(str(args.outdir)): os.makedirs(str(args.outdir)) os.chdir(args.outdir) # link plink files (with verification) link(str(wd+'/'+args.unrel_bfile+'.bed'), str(args.unrel_bfile+'.bed'), 'bed file for unrelated individuals') link(str(wd+'/'+args.unrel_bfile+'.bim'), str(args.unrel_bfile+'.bim'), 'bim file for unrelated individuals') link(str(wd+'/'+args.unrel_bfile+'.fam'), str(args.unrel_bfile+'.fam'), 'fam file for unrelated individuals') link(str(wd+'/'+args.target_bfile+'.bed'), str(args.target_bfile+'.bed'), 'bed file for target individuals') link(str(wd+'/'+args.target_bfile+'.bim'), str(args.target_bfile+'.bim'), 'bim file for target individuals') link(str(wd+'/'+args.target_bfile+'.fam'), str(args.target_bfile+'.fam'), 'fam file for target individuals') # link pca file, if provided if not (args.plot_admix_pca==None or args.plot_admix_pca=="None"): link(str(wd+'/'+args.plot_admix_pca), str(args.plot_admix_pca), 'PCA file') #############