def qsub_a_command(cmd,
                   shell_script_name,
                   split_string=',',
                   memory_number='20G'):
    f = open(shell_script_name, 'w')
    print shell_script_name
    cmds = cmd.split(split_string)
    for i in range(len(cmds)):
        f.write(cmds[i] + '\n')
    f.close()
    os.system('chmod 711 ' + shell_script_name)
    #import ipdb; ipdb.set_trace()
    if my.f_get_server_name() == 'loire':
        os.system('sh %s' % shell_script_name)
    else:
        os.system("qsub -V -q shi.q -l mem_free=" + memory_number +
                  " -l h_vmem=" + memory_number + " -l h_rt=20:00:00 -o " +
                  shell_script_name + '.o' + ' -e ' + shell_script_name +
                  '.e' + ' ' + shell_script_name)
示例#2
0
    parser.add_argument('--batch_name', help="batch_name", default=None)
    parser.add_argument('--chr_str', help="chr", default=None)
    parser.add_argument('--value_type', help="value type", default=None)
    args = parser.parse_args()

    batch_name = args.batch_name
    chr_str = args.chr_str
    value_type = args.value_type
else:

    batch_name = '445samples_region'
    chr_str = 'chr22'
    value_type = 'diff'

if my.f_get_server_name() == 'loire':
    batch_name = '445samples_region'
else:
    batch_name = args.batch_name

DEBUG = True

#vcf_file = '%s/deepsea/tests/data/%s.merge.head.vcf.gz'%(project_dir, chr_str)
vcf_file = '%s/data/%s/chr_vcf_files/chrMerge2/%s.vcf.gz' % (
    project_dir, batch_name, chr_str)
vcf_df = pd.io.parsers.read_csv(vcf_file,
                                sep="\t",
                                header=None,
                                compression='gzip').ix[:, 0:5]
print vcf_df.head()
vcf_df.columns = ['chr', 'pos', 'name', 'ref', 'alt']

new_batch=new_batch.replace('_','.')
loc_dir = 'rm.histone_model.cv.glmnet_add.penalty_population.None_new.batch.445samples.snyder.norm_batch.mode.TFMODE_other.info.normCor'.replace('TFMODE', target_mode).replace('normCor', other_info).replace('add.penalty', penalty_str).replace('445samples.snyder.norm', new_batch)

print penalty_str
print add_penalty
full_result_dir = '%s/data/%s/rnaseq/%s/%s' % (project_dir, batch_name, chr_str, loc_dir)
print project_dir
print full_result_dir

import time
start_time = time.time()


if my.f_get_server_name() != 'loire' and 'clustdell' not in my.f_get_server_name(): 
    time_interval = 120
else:
    time_interval = 0.25



while True:

    if os.path.exists(full_result_dir):
        gene_output = my.f_grep_files_from_dir(full_result_dir, '%s.*enet$' % last_gene)
    else:
        gene_output = []

    
    if len(gene_output) == 0:
示例#4
0
 def setUp(self):
     self.locker_list = locker_queue()
     self.server_name = my.f_get_server_name()
示例#5
0
import sys

sys.path.insert(0, lib_dir)
sys.path.insert(0, '%s/expression_var/python/' % home_dir)
import pandas as pd
import p_mymodule as my
from p_project_metadata import *

#batch_name = '800samples'
batch_name = '462samples'
#chr_num_list =[22, 10, 15]
chr_num_list = ['X']

for chr_num in chr_num_list:
    cmd = 'python2.7 p_merge_tf_results.py --batch_name %s --chr_str chr%s --value_type diff' % (
        batch_name, chr_num)
    my.f_shell_cmd(cmd)
    cmd = 'python2.7 p_merge_tf_results.py --batch_name %s --chr_str chr%s --value_type ref' % (
        batch_name, chr_num)
    my.f_shell_cmd(cmd)

if my.f_get_server_name() == 'wqshi':
    if batch_name == '800samples':
        my.f_shell_cmd(
            'scp $HOME/expression_var/data/%s/deep_result/all/chrMergeTF/*.gz [email protected]:/homed/home/shi/expression_var/data/800samples/deep_result/all/chr800/diff/'
            % (batch_name))
    else:
        my.f_shell_cmd(
            'scp $HOME/expression_var/data/%s/deep_result/all/chrMergeTF/*.gz [email protected]:/homed/home/shi/expression_var/data/445samples_region/deep_result/all/chrMerge2/diff/'
            % (batch_name))
#Use homer to extract read density from bam files.
import os
home_dir = os.path.expanduser('~')
lib_dir = '%s/expression/python/' % home_dir
import sys
sys.path.append(lib_dir)
print(sys.path)
from p_project_metadata import *
import p_mymodule as my


if (my.f_get_server_name() == "loire"):
    head_dir="/homed/home/shi/anthony/tfbs_chipseq/ENCODE/dnase/"
    node_dir="/homed/home/shi/anthony/tfbs_chipseq/ENCODE/dnase/node_dir"
else:
    head_dir="/home/shi/projects/expression_var/data/raw_data/tf/embl_data"
    node_dir="/state/partition1/shi/tmp/"
    node_dir='/raid6/shi/tmp/'
tf_peak = {'PU1':'haib-gm12878-pu1.narrowPeak', 'RPB2':'haib-gm12878-pol2.narrowPeak', 'CTCF':'sydh-gm12878-ctcf.narrowPeak'}

#tf_list = ['RPB2', 'PU1']
tf_list = ['CTCF']


from joblib import Parallel, delayed
import multiprocessing

num_cores = 2 #multiprocessing.cpu_count()-4
print num_cores
#results = Parallel(n_jobs=num_cores)(delayed(process_one_sample)(sample_id, [new_chr_name]) for sample_id in sample_list)