def hisat_align(self, inputs, output, ref_basename, sample): '''Align fastq files with HISAT2 and sort''' cores = self.get_stage_options("hisat", "cores") mem = "{}G".format(self.get_stage_options("hisat", "mem")) safe_make_dir(os.path.dirname(output)) #logging.debug(self.experiment.tr_dict[sample]) output_log = re.sub(".bam$", ".log", output) # If PE fastq inputs, use hisat -1 and -2 arguments, else use -U if self.paired_end: fastq_input = "-1 {fastq_R1} -2 {fastq_R2}".format( fastq_R1=inputs[0], fastq_R2=inputs[1]) else: fastq_input = "-U {fastq}".format(fastq=inputs) if self.experiment.stranded in ["FR", "RF", "F", "R"]: stranded = "--rna-strandness {}".format(self.experiment.stranded) else: stranded = "" # Get RG information info = self.experiment.tr_dict[sample] command = "hisat2 -p {n_threads} --dta {stranded} " \ "--rg-id {sm}_{id}_{ln} --rg SM:{sm} " \ "--rg LB:{lb} --rg PL:Illumina -x {ref_basename} " \ "{fastq_input} 2> {output_log} | samtools view -bS - > " \ "{output_bam} 2>> {output_log}" \ "".format(n_threads=cores, stranded=stranded, id=info.id, ln=info.lane, sm=info.sample_name, lb=info.library, ref_basename=ref_basename, fastq_input=fastq_input, output_bam=output, output_log=output_log) run_stage(self.state, "hisat", command)
def fastqc(self, input, outputs, fastqc_dir): '''Run FastQC on fastq files''' safe_make_dir(fastqc_dir) # If multiple fastq inputs, join into a string if isinstance(input, tuple) or isinstance(input, list): input = " ".join(input) command = "fastqc -o {fastqc_dir} -f fastq {fastq_input}".format( fastqc_dir=fastqc_dir, fastq_input=input) run_stage(self.state, "fastqc", command)
def create_hisat_index(self, inputs, outputs, hisat_basename): '''Generate index for HISAT2''' safe_make_dir(os.path.dirname(hisat_basename)) genome_fa, gene_gtf = inputs cores = self.get_stage_options("build_index", "cores") command = "hisat2-build -p {n_threads} {genome_fa} {basename}" \ "".format(n_threads=cores, genome_fa=genome_fa, gene_gtf=gene_gtf, basename=hisat_basename) run_stage(self.state, "build_index", command)
def create_star_index(self, inputs, outputs, output_dir): '''Generate index for STAR''' safe_make_dir(output_dir) genome_fa, gene_gtf = inputs cores = self.get_stage_options("align", "cores") command = "STAR --runThreadN {n_threads} --runMode genomeGenerate " \ "--genomeDir {output_dir} --genomeFastaFiles {genome_fa} " \ "--sjdbGTFfile {gene_gtf}".format(n_threads=cores, output_dir=output_dir, genome_fa=genome_fa, gene_gtf=gene_gtf) run_stage(self.state, "build_index", command)
def star_align(self, inputs, output, ref_dir, sample): '''Align fastq files with STAR''' output_dir = os.path.dirname(output) safe_make_dir(output_dir) #logging.debug(self.experiment.tr_dict[sample]) cores = self.get_stage_options("align", "cores") # If PE fastq inputs, join into a string if self.paired_end: fastq_input = " ".join(inputs) else: fastq_input = inputs command = "STAR --runThreadN {cores} --genomeDir {ref_dir} " \ "--readFilesIn {fastq_input} --readFilesCommand zcat " \ "--outFileNamePrefix {output_dir}/{sample}.star. " \ "--outSAMtype BAM Unsorted " \ "--outSAMunmapped Within " \ "".format(cores=cores, ref_dir=ref_dir, fastq_input=fastq_input, output_dir=output_dir, sample=sample) run_stage(self.state, 'star', command)
def make_output_dirs(output_dict): '''Create directory for each value in the dictionary''' for dir in output_dict.values(): safe_make_dir(dir)