def write_gemma_jobs(gemma_options, output_dir, logs_dir, input_dir, genotype_fn, execute): qname = "serial_requeue" mem_usage = "1000" myos.check_if_directory_exists_create_it(output_dir) myos.check_if_directory_exists_create_it(logs_dir) batch_number = 1 # counter of batch batch_size = 1 # how many runs per batch job_number_within_batch = 0 # counter of job within batch pheno_files = os.listdir(input_dir) for pheno_n in pheno_files: pheno_fn = os.path.join(input_dir, pheno_n) if job_number_within_batch == 0: job_name = os.path.splitext(pheno_n)[0] bsubcmd = myos.write_bsub_string_no_rm_logs_dir(logs_dir, job_name, qname = qname, mem_usage = mem_usage, time = '1438') if os.path.exists(pheno_fn) is False or os.path.exists(genotype_fn) is False: print 'Oooops, One of these files to process does not exist!!! %s %s' %(pheno_fn, genotype_fn) return 0 output_prefix = job_name exec_cmd = execs_commands.gemma(output_dir, gemma_options, pheno_fn, genotype_fn, output_prefix) print exec_cmd job_script_fn = bsubcmd.split(' ')[-1] with open(job_script_fn, "a") as job_script_f: print bsubcmd job_script_f.write('echo \"%s\"\n' %(exec_cmd)) job_script_f.write(exec_cmd+'\n') job_number_within_batch += 1 if job_number_within_batch == batch_size: if execute: os.system(bsubcmd) batch_number += 1 job_number_within_batch = 0 if job_number_within_batch > 0 and job_number_within_batch < batch_size: if execute: os.system(bsubcmd) return 0
def write_InferDPB_jobs(output_dir, logs_dir, input_dir, ncore='4', threadnum=4, execute=False): qname = "serial_requeue" mem_usage = "1000" #threadnum = 4 myos.check_if_directory_exists_create_it(output_dir) myos.check_if_directory_exists_create_it(logs_dir) #batch_number = 1 # counter of batch #batch_size = 1 # how many runs per batch #job_number_within_batch = 0 # counter of job within batch fn_array = np.array([0.1, 0.2, 0.4, 0.8, 0.9], float) fp_array = np.array([0.0001, 0.001, 0.002, 0.01], float) tmp = 1 for fn in fn_array: for fp in fp_array: tmp = tmp + 1 for i in range(1, 6): d2s = os.path.join(input_dir, 'Drug_Sub') p2d = os.path.join(input_dir, 'Protein_Domain') d2p = os.path.join(input_dir, 'Drug_Protein_' + str(i)) s2d_in = os.path.join(input_dir, 'Sub_Domain_' + str(i)) outname = 'Sub_Domain_Result_' + str(tmp) + '_' + str(i) job_name = 's2d_job' + '_' + str(tmp) + '_' + str(i) bsubcmd = myos.write_bsub_string_no_rm_logs_dir( logs_dir, job_name, qname=qname, mem_usage=mem_usage, ncores=ncore, time='1438') if os.path.exists(d2s) is False or os.path.exists( p2d) is False or os.path.exists( d2p) is False or os.path.exists(s2d_in) is False: print "Cannot find some input files!" return 0 exec_cmd = execs_commands.inferDPB_fnfp( output_dir, fn, fp, threadnum, d2s, p2d, d2p, s2d_in, outname) print exec_cmd job_script_fn = bsubcmd.split(' ')[-1] with open(job_script_fn, 'a') as job_script_f: print bsubcmd job_script_f.write('echo \"%s\"\n' % (exec_cmd)) job_script_f.write(exec_cmd + '\n') if execute: os.system(bsubcmd) return 0
def write_gemma_jobs(gemma_options, output_dir, logs_dir, input_dir, genotype_fn, execute): qname = "serial_requeue" mem_usage = "1000" myos.check_if_directory_exists_create_it(output_dir) myos.check_if_directory_exists_create_it(logs_dir) batch_number = 1 # counter of batch batch_size = 1 # how many runs per batch job_number_within_batch = 0 # counter of job within batch pheno_files = os.listdir(input_dir) for pheno_n in pheno_files: pheno_fn = os.path.join(input_dir, pheno_n) if job_number_within_batch == 0: job_name = os.path.splitext(pheno_n)[0] bsubcmd = myos.write_bsub_string_no_rm_logs_dir( logs_dir, job_name, qname=qname, mem_usage=mem_usage, time='1438') if os.path.exists(pheno_fn) is False or os.path.exists( genotype_fn) is False: print 'Oooops, One of these files to process does not exist!!! %s %s' % ( pheno_fn, genotype_fn) return 0 output_prefix = job_name exec_cmd = execs_commands.gemma(output_dir, gemma_options, pheno_fn, genotype_fn, output_prefix) print exec_cmd job_script_fn = bsubcmd.split(' ')[-1] with open(job_script_fn, "a") as job_script_f: print bsubcmd job_script_f.write('echo \"%s\"\n' % (exec_cmd)) job_script_f.write(exec_cmd + '\n') job_number_within_batch += 1 if job_number_within_batch == batch_size: if execute: os.system(bsubcmd) batch_number += 1 job_number_within_batch = 0 if job_number_within_batch > 0 and job_number_within_batch < batch_size: if execute: os.system(bsubcmd) return 0
def write_InferDPB_jobs(output_dir, logs_dir, input_dir, ncore="4", threadnum=4, execute=False): qname = "serial_requeue" mem_usage = "1000" # threadnum = 4 myos.check_if_directory_exists_create_it(output_dir) myos.check_if_directory_exists_create_it(logs_dir) # batch_number = 1 # counter of batch # batch_size = 1 # how many runs per batch # job_number_within_batch = 0 # counter of job within batch fn_array = np.array([0.1, 0.2, 0.4, 0.8, 0.9], float) fp_array = np.array([0.0001, 0.001, 0.002, 0.01], float) tmp = 1 for fn in fn_array: for fp in fp_array: tmp = tmp + 1 for i in range(1, 6): d2s = os.path.join(input_dir, "Drug_Sub") p2d = os.path.join(input_dir, "Protein_Domain") d2p = os.path.join(input_dir, "Drug_Protein_" + str(i)) s2d_in = os.path.join(input_dir, "Sub_Domain_" + str(i)) outname = "Sub_Domain_Result_" + str(tmp) + "_" + str(i) job_name = "s2d_job" + "_" + str(tmp) + "_" + str(i) bsubcmd = myos.write_bsub_string_no_rm_logs_dir( logs_dir, job_name, qname=qname, mem_usage=mem_usage, ncores=ncore, time="1438" ) if ( os.path.exists(d2s) is False or os.path.exists(p2d) is False or os.path.exists(d2p) is False or os.path.exists(s2d_in) is False ): print "Cannot find some input files!" return 0 exec_cmd = execs_commands.inferDPB_fnfp(output_dir, fn, fp, threadnum, d2s, p2d, d2p, s2d_in, outname) print exec_cmd job_script_fn = bsubcmd.split(" ")[-1] with open(job_script_fn, "a") as job_script_f: print bsubcmd job_script_f.write('echo "%s"\n' % (exec_cmd)) job_script_f.write(exec_cmd + "\n") if execute: os.system(bsubcmd) return 0
def write_DECODE_jobs(logs_dir, input_dir, gene_residual_file, tissuenm, outdir, splitnum=20, execute=False): qname = "serial_requeue" mem_usage = "25000" myos.check_if_directory_exists_create_it(logs_dir) myos.check_if_directory_exists_create_it(outdir) genetotalnum = myos.wccount(input_dir + "genelocsnp") taskseq = splitinteger(genetotalnum, splitnum) for i in range(0, splitnum): job_name = 'gtex_decode' + '_' + str(taskseq[i][0]) + '_' + str( taskseq[i][1]) bsubcmd = myos.write_bsub_string_no_rm_logs_dir(logs_dir, job_name, qname=qname, mem_usage=mem_usage, time='300') if os.path.exists(gene_residual_file) is False: print "Cannot find some input files!" return 0 exec_cmd = execs_commands.gtex_decode(gene_residual_file, taskseq[i][0], taskseq[i][1], tissuenm, outdir) print exec_cmd job_script_fn = bsubcmd.split(' ')[-1] with open(job_script_fn, 'a') as job_script_f: print bsubcmd job_script_f.write('echo \"%s\"\n' % (exec_cmd)) job_script_f.write(exec_cmd + '\n') if execute: os.system(bsubcmd) return 0