def qsub_a_command(cmd, shell_script_name, split_string=',', memory_number='20G'): f = open(shell_script_name, 'w') print shell_script_name cmds = cmd.split(split_string) for i in range(len(cmds)): f.write(cmds[i] + '\n') f.close() os.system('chmod 711 ' + shell_script_name) #import ipdb; ipdb.set_trace() if my.f_get_server_name() == 'loire': os.system('sh %s' % shell_script_name) else: os.system("qsub -V -q shi.q -l mem_free=" + memory_number + " -l h_vmem=" + memory_number + " -l h_rt=20:00:00 -o " + shell_script_name + '.o' + ' -e ' + shell_script_name + '.e' + ' ' + shell_script_name)
parser.add_argument('--batch_name', help="batch_name", default=None) parser.add_argument('--chr_str', help="chr", default=None) parser.add_argument('--value_type', help="value type", default=None) args = parser.parse_args() batch_name = args.batch_name chr_str = args.chr_str value_type = args.value_type else: batch_name = '445samples_region' chr_str = 'chr22' value_type = 'diff' if my.f_get_server_name() == 'loire': batch_name = '445samples_region' else: batch_name = args.batch_name DEBUG = True #vcf_file = '%s/deepsea/tests/data/%s.merge.head.vcf.gz'%(project_dir, chr_str) vcf_file = '%s/data/%s/chr_vcf_files/chrMerge2/%s.vcf.gz' % ( project_dir, batch_name, chr_str) vcf_df = pd.io.parsers.read_csv(vcf_file, sep="\t", header=None, compression='gzip').ix[:, 0:5] print vcf_df.head() vcf_df.columns = ['chr', 'pos', 'name', 'ref', 'alt']
new_batch=new_batch.replace('_','.') loc_dir = 'rm.histone_model.cv.glmnet_add.penalty_population.None_new.batch.445samples.snyder.norm_batch.mode.TFMODE_other.info.normCor'.replace('TFMODE', target_mode).replace('normCor', other_info).replace('add.penalty', penalty_str).replace('445samples.snyder.norm', new_batch) print penalty_str print add_penalty full_result_dir = '%s/data/%s/rnaseq/%s/%s' % (project_dir, batch_name, chr_str, loc_dir) print project_dir print full_result_dir import time start_time = time.time() if my.f_get_server_name() != 'loire' and 'clustdell' not in my.f_get_server_name(): time_interval = 120 else: time_interval = 0.25 while True: if os.path.exists(full_result_dir): gene_output = my.f_grep_files_from_dir(full_result_dir, '%s.*enet$' % last_gene) else: gene_output = [] if len(gene_output) == 0:
def setUp(self): self.locker_list = locker_queue() self.server_name = my.f_get_server_name()
import sys sys.path.insert(0, lib_dir) sys.path.insert(0, '%s/expression_var/python/' % home_dir) import pandas as pd import p_mymodule as my from p_project_metadata import * #batch_name = '800samples' batch_name = '462samples' #chr_num_list =[22, 10, 15] chr_num_list = ['X'] for chr_num in chr_num_list: cmd = 'python2.7 p_merge_tf_results.py --batch_name %s --chr_str chr%s --value_type diff' % ( batch_name, chr_num) my.f_shell_cmd(cmd) cmd = 'python2.7 p_merge_tf_results.py --batch_name %s --chr_str chr%s --value_type ref' % ( batch_name, chr_num) my.f_shell_cmd(cmd) if my.f_get_server_name() == 'wqshi': if batch_name == '800samples': my.f_shell_cmd( 'scp $HOME/expression_var/data/%s/deep_result/all/chrMergeTF/*.gz [email protected]:/homed/home/shi/expression_var/data/800samples/deep_result/all/chr800/diff/' % (batch_name)) else: my.f_shell_cmd( 'scp $HOME/expression_var/data/%s/deep_result/all/chrMergeTF/*.gz [email protected]:/homed/home/shi/expression_var/data/445samples_region/deep_result/all/chrMerge2/diff/' % (batch_name))
#Use homer to extract read density from bam files. import os home_dir = os.path.expanduser('~') lib_dir = '%s/expression/python/' % home_dir import sys sys.path.append(lib_dir) print(sys.path) from p_project_metadata import * import p_mymodule as my if (my.f_get_server_name() == "loire"): head_dir="/homed/home/shi/anthony/tfbs_chipseq/ENCODE/dnase/" node_dir="/homed/home/shi/anthony/tfbs_chipseq/ENCODE/dnase/node_dir" else: head_dir="/home/shi/projects/expression_var/data/raw_data/tf/embl_data" node_dir="/state/partition1/shi/tmp/" node_dir='/raid6/shi/tmp/' tf_peak = {'PU1':'haib-gm12878-pu1.narrowPeak', 'RPB2':'haib-gm12878-pol2.narrowPeak', 'CTCF':'sydh-gm12878-ctcf.narrowPeak'} #tf_list = ['RPB2', 'PU1'] tf_list = ['CTCF'] from joblib import Parallel, delayed import multiprocessing num_cores = 2 #multiprocessing.cpu_count()-4 print num_cores #results = Parallel(n_jobs=num_cores)(delayed(process_one_sample)(sample_id, [new_chr_name]) for sample_id in sample_list)