def rsem(inputs, outputs): """ :params inputs: a tuple of 1 or 2 fastq.gz files, e.g. ('/path/to/rsem_output/homo_sapiens/GSE50599/GSM1224499/SRR968078_1.fastq.gz', '/path/to/rsem_output/homo_sapiens/GSE50599/GSM1224499/SRR968078_2.fastq.gz') """ inputs = [_ for _ in inputs if not _.endswith('.sra2fastq.COMPLETE')] # this is equivalent to the sample.outdir or GSM dir outdir = os.path.dirname(inputs[0]) # the names of parameters are the same as that in gen_qsub_script, but # their values are more or less different, so better keep them separate fastq_gz_input = gen_fastq_gz_input(inputs) res = re.search(PATH_RE, outdir) gse = res.group('GSE') species = res.group('species') gsm = res.group('GSM') reference_name = config['LOCAL_REFERENCE_NAMES'][species] sample_name = '{outdir}/{gsm}'.format(**locals()) n_jobs = options.j_rsem flag_file = outputs[-1] cmd = config['CMD_RSEM'].format(n_jobs=n_jobs, fastq_gz_input=fastq_gz_input, reference_name=reference_name, sample_name=sample_name, output_dir=outdir) misc.execute_log_stdout_stderr(cmd, flag_file=flag_file, debug=options.debug)
def gen_qsub_script(inputs, outputs): """generate qsub script, usually named 0_submit.sh""" inputs = [_ for _ in inputs if not _.endswith('.sra2fastq.COMPLETE')] # SHOULD DO TRY EXCEPT IN CASE THE PREVIOUS STEP DIDN'T FINISH SUCCESSFULLY outdir = os.path.dirname(inputs[0]) # only need the basename since the 0_submit.sh will be executed in the # GSM dir fastq_gz_input = gen_fastq_gz_input([os.path.basename(_) for _ in inputs]) res = re.search(PATH_RE, outdir) gse = res.group('GSE') species = res.group('species') gsm = res.group('GSM') reference_name = config['REMOTE_REFERENCE_NAMES'][species] sample_name = '{gsm}'.format(gsm=gsm) qsub_script = os.path.join(outdir, '0_submit.sh') # TEMPLATES_DIR: the standard templates directory # os.getcwd(): the current working directory # use both for looking for the template jinja2_env = Environment( loader=FileSystemLoader([TEMPLATES_DIR, os.getcwd()])) template = jinja2_env.get_template(options.qsub_template) with open(qsub_script, 'wb') as opf: content = template.render(**locals()) opf.write(content) logger.info('templated {0}'.format(qsub_script))
def gen_qsub_script(inputs, outputs): """generate qsub script, usually named 0_submit.sh""" inputs = [_ for _ in inputs if not _.endswith('.sra2fastq.COMPLETE')] # SHOULD DO TRY EXCEPT IN CASE THE PREVIOUS STEP DIDN'T FINISH SUCCESSFULLY outdir = os.path.dirname(inputs[0]) # only need the basename since the 0_submit.sh will be executed in the # GSM dir fastq_gz_input = gen_fastq_gz_input( [os.path.basename(_) for _ in inputs]) res = re.search(PATH_RE, outdir) gse = res.group('GSE') species = res.group('species') gsm = res.group('GSM') reference_name = config['REMOTE_REFERENCE_NAMES'][species] sample_name = '{gsm}'.format(gsm=gsm) qsub_script = os.path.join(outdir, '0_submit.sh') # TEMPLATES_DIR: the standard templates directory # os.getcwd(): the current working directory # use both for looking for the template jinja2_env = Environment(loader=FileSystemLoader([TEMPLATES_DIR, os.getcwd()])) template = jinja2_env.get_template(options.qsub_template) with open(qsub_script, 'wb') as opf: content = template.render(**locals()) opf.write(content) logger.info('templated {0}'.format(qsub_script))
def rsem(inputs, outputs): """ :params inputs: a tuple of 1 or 2 fastq.gz files, e.g. ('/path/to/rsem_output/homo_sapiens/GSE50599/GSM1224499/SRR968078_1.fastq.gz', '/path/to/rsem_output/homo_sapiens/GSE50599/GSM1224499/SRR968078_2.fastq.gz') """ inputs = [_ for _ in inputs if not _.endswith('.sra2fastq.COMPLETE')] # this is equivalent to the sample.outdir or GSM dir outdir = os.path.dirname(inputs[0]) # the names of parameters are the same as that in gen_qsub_script, but # their values are more or less different, so better keep them separate fastq_gz_input = gen_fastq_gz_input(inputs) res = re.search(PATH_RE, outdir) gse = res.group('GSE') species = res.group('species') gsm = res.group('GSM') reference_name = config['LOCAL_REFERENCE_NAMES'][species] sample_name = '{outdir}/{gsm}'.format(**locals()) n_jobs = options.j_rsem flag_file = outputs[-1] cmd = config['CMD_RSEM'].format( n_jobs=n_jobs, fastq_gz_input=fastq_gz_input, reference_name=reference_name, sample_name=sample_name, output_dir=outdir) misc.execute_log_stdout_stderr(cmd, flag_file=flag_file, debug=options.debug)
def test_gen_fastq_gz_input(self): self.assertEqual(rsem.gen_fastq_gz_input( [ 'path/to/SRR000000_1.fastq.gz', 'path/to/SRR000000_2.fastq.gz' ]), '--paired-end <(/bin/zcat path/to/SRR000000_1.fastq.gz) <(/bin/zcat path/to/SRR000000_2.fastq.gz)') self.assertEqual(rsem.gen_fastq_gz_input( [ 'path/to/SRR000000_1.fastq.gz', 'path/to/SRR111111_1.fastq.gz', 'path/to/SRR000000_2.fastq.gz', 'path/to/SRR111111_2.fastq.gz' ]), ('--paired-end ' '<(/bin/zcat path/to/SRR000000_1.fastq.gz path/to/SRR111111_1.fastq.gz) ' '<(/bin/zcat path/to/SRR000000_2.fastq.gz path/to/SRR111111_2.fastq.gz)')) self.assertEqual(rsem.gen_fastq_gz_input(['path/to/SRR000000_1.fastq.gz']), '<(/bin/zcat path/to/SRR000000_1.fastq.gz)') self.assertEqual(rsem.gen_fastq_gz_input(['path/to/SRR000000_2.fastq.gz']), '<(/bin/zcat path/to/SRR000000_2.fastq.gz)') self.assertIsNone(rsem.gen_fastq_gz_input(['invalid_fastq_gz_name']))