def after_genome_download(self, genome, threads=1, force=False): if not cmd_ok("gmap_build"): return # Create index dir index_dir = genome.plugin["gmap"]["index_dir"] if force: # Start from scratch rm_rf(index_dir) if not os.path.exists(index_dir): # unzip genome if zipped and return up-to-date genome name fname, bgzip = gunzip_and_name(genome.filename) # gmap outputs a folder named genome.name # its content is moved to index dir, consistent with other plugins tmp_dir = mkdtemp(dir=".") # Create index cmd = f"gmap_build -D {tmp_dir} -d {genome.name} {fname}" run_index_cmd("gmap", cmd) # Move files to index_dir src = os.path.join(tmp_dir, genome.name) move(src, index_dir) rm_rf(tmp_dir) # re-zip genome if unzipped bgzip_and_name(fname, bgzip)
def after_genome_download(self, genome, force=False): if not cmd_ok("STAR"): return # Create index dir index_dir = genome.props["star"]["index_dir"] index_name = genome.props["star"]["index_name"] if force: # Start from scratch rmtree(index_dir, ignore_errors=True) mkdir_p(index_dir) if not os.path.exists(index_name): # If the genome is bgzipped it needs to be unzipped first fname = genome.filename bgzip = False if fname.endswith(".gz"): ret = sp.check_call(["gunzip", fname]) if ret != 0: raise Exception("Error gunzipping genome {}".format(fname)) fname = re.sub(".gz$", "", fname) bgzip = True # Create index cmd = "STAR --runMode genomeGenerate --genomeFastaFiles {} --genomeDir {} --outFileNamePrefix {}".format( fname, index_dir, index_dir) run_index_cmd("star", cmd) # Rezip genome if it was bgzipped if bgzip: ret = sp.check_call(["bgzip", fname]) if ret != 0: raise Exception( "Error bgzipping genome {}. ".format(fname) + "Is tabix installed?")
def after_genome_download(self, genome, force=False): if not cmd_ok("hisat2-build"): return # Create index dir index_dir = genome.props["hisat2"]["index_dir"] index_name = genome.props["hisat2"]["index_name"] if force: # Start from scratch rmtree(index_dir, ignore_errors=True) mkdir_p(index_dir) if not any(fname.endswith(".ht2") for fname in os.listdir(index_dir)): # If the genome is bgzipped it needs to be unzipped first fname = genome.filename bgzip = False if fname.endswith(".gz"): ret = sp.check_call(["gunzip", fname]) if ret != 0: raise Exception("Error gunzipping genome {}".format(fname)) fname = re.sub(".gz$", "", fname) bgzip = True # Create index cmd = "hisat2-build {} {}".format(fname, index_name) run_index_cmd("hisat2", cmd) if bgzip: ret = sp.check_call(["bgzip", fname]) if ret != 0: raise Exception( "Error bgzipping genome {}. ".format(fname) + "Is tabix installed?")
def after_genome_download(self, genome): if not cmd_ok("gmap_build"): return # Create index dir index_dir = genome.props["gmap"]["index_dir"] mkdir_p(index_dir) # If the genome is bgzipped it needs to be unzipped first fname = genome.filename bgzip = False if fname.endswith(".gz"): ret = sp.check_call(["gunzip", fname]) if ret != 0: raise Exception("Error gunzipping genome {}".format(fname)) fname = re.sub(".gz$", "", fname) bgzip = True # Create index cmd = "gmap_build -D {} -d {} {}".format(index_dir, genome.name, genome.filename) run_index_cmd("gmap", cmd) if bgzip: ret = sp.check_call(["bgzip", fname]) if ret != 0: raise Exception("Error bgzipping genome {}. ".format(fname) + "Is tabix installed?")
def after_genome_download(self, genome, threads=1, force=False): index_name = genome.plugin["star"]["index_name"] if not cmd_ok("STAR") or (os.path.exists(index_name) and not force): return index_dir = genome.plugin["star"]["index_dir"] rm_rf(index_dir) mkdir_p(index_dir) # gunzip genome if bgzipped and return up-to-date genome name with extracted_file(genome.filename) as fname: # index command cmd = (f"STAR --runMode genomeGenerate --runThreadN {threads} " + f"--genomeFastaFiles {fname} --genomeDir {index_dir} " + f"--outFileNamePrefix {index_dir}") # if an annotation is present, generate a splice-aware index gtf_file = genome.annotation_gtf_file if gtf_file: with extracted_file(gtf_file) as _gtf_file: # update index command with annotation cmd += f" --sjdbGTFfile {_gtf_file}" # Create index run_index_cmd("star", cmd) else: logger.info("Creating STAR index without annotation file.") # Create index run_index_cmd("star", cmd)
def after_genome_download(self, genome): if not cmd_ok("minimap2"): return # Create index dir index_dir = genome.props["minimap2"]["index_dir"] index_name = genome.props["minimap2"]["index_name"] mkdir_p(index_dir) # Create index cmd = "minimap2 -d {} {}".format(index_name, genome.filename) run_index_cmd("minimap2", cmd)
def after_genome_download(self, genome): if not cmd_ok("bowtie2-build"): return # Create index dir index_dir = genome.props["bowtie2"]["index_dir"] index_name = genome.props["bowtie2"]["index_name"] mkdir_p(index_dir) # Create index cmd = "bowtie2-build {} {}".format(genome.filename, index_name) run_index_cmd("bowtie2", cmd)
def after_genome_download(self, genome, threads=1, force=False): index_name = genome.plugin["hisat2"]["index_name"] if not cmd_ok("hisat2-build") or ( os.path.exists(f"{index_name}.1.ht2") and not force ): return index_dir = genome.plugin["hisat2"]["index_dir"] rm_rf(index_dir) mkdir_p(index_dir) # gunzip genome if bgzipped and return up-to-date genome name fname, bgzip = gunzip_and_name(genome.filename) # index command cmd = f"hisat2-build -p {threads} {fname} {index_name}" # if an annotation is present, generate a splice-aware index gtf_file = genome.annotation_gtf_file if gtf_file: # gunzip if gzipped gtf_file, gzip_file = gunzip_and_name(gtf_file) # generate splice and exon site files to enhance indexing hisat_path = ( sp.Popen("which hisat2", stdout=sp.PIPE, shell=True) .stdout.read() .decode("utf8") .strip() ) splice_script = hisat_path + "_extract_splice_sites.py" splice_file = os.path.join(genome.genome_dir, "splice_sites.txt") sp.check_call( f"python3 {splice_script} {gtf_file} > {splice_file}", shell=True ) exon_script = hisat_path + "_extract_exons.py" exon_file = os.path.join(genome.genome_dir, "exon_sites.txt") sp.check_call(f"python3 {exon_script} {gtf_file} > {exon_file}", shell=True) # re-gzip annotation if gunzipped gzip_and_name(gtf_file, gzip_file) # update index command with annotation cmd += f" --ss {splice_file} --exon {exon_file}" else: print("\nCreating Hisat2 index without annotation file.") # Create index run_index_cmd("hisat2", cmd) # re-bgzip genome if gunzipped bgzip_and_name(fname, bgzip)
def after_genome_download(self, genome): if not cmd_ok("gmap_build"): return # Create index dir index_dir = genome.props["gmap"]["index_dir"] index_name = genome.props["gmap"]["index_name"] mkdir_p(index_dir) # Create index cmd = "gmap_build -D {} -d {} {}".format( index_dir, genome.name, genome.filename) run_index_cmd("gmap", cmd)
def after_genome_download(self, genome): if not cmd_ok("gmap_build"): return # Create index dir index_dir = genome.props["gmap"]["index_dir"] index_name = genome.props["gmap"]["index_name"] mkdir_p(index_dir) # Create index cmd = "gmap_build -D {} -d {} {}".format(index_dir, genome.name, genome.filename) run_index_cmd("gmap", cmd)
def after_genome_download(self, genome): if not cmd_ok("bwa"): return # Create index dir index_dir = genome.props["bwa"]["index_dir"] index_fa = genome.props["bwa"]["index_name"] mkdir_p(index_dir) if not os.path.exists(index_fa): os.symlink(genome.filename, index_fa) cmd = "bwa index {}".format(index_fa) run_index_cmd("bwa", cmd)
def after_genome_download(self, genome, threads=1, force=False): if not cmd_ok("minimap2"): return # Create index dir index_dir = genome.plugin["minimap2"]["index_dir"] index_name = genome.plugin["minimap2"]["index_name"] if force: # Start from scratch rm_rf(index_dir) mkdir_p(index_dir) if not any(fname.endswith(".mmi") for fname in os.listdir(index_dir)): # Create index cmd = f"minimap2 -t {threads} -d {index_name} {genome.filename}" run_index_cmd("minimap2", cmd)
def after_genome_download(self, genome, force=False): if not cmd_ok("bowtie2-build"): return # Create index dir index_dir = genome.props["bowtie2"]["index_dir"] index_name = genome.props["bowtie2"]["index_name"] if force: # Start from scratch rmtree(index_dir, ignore_errors=True) mkdir_p(index_dir) if not any(fname.endswith(".bt2") for fname in os.listdir(index_dir)): # Create index cmd = "bowtie2-build {} {}".format(genome.filename, index_name) run_index_cmd("bowtie2", cmd)
def after_genome_download(self, genome, threads=1, force=False): if not cmd_ok("bwa"): return # Create index dir index_dir = genome.plugin["bwa"]["index_dir"] index_name = genome.plugin["bwa"]["index_name"] if force: # Start from scratch rm_rf(index_dir) mkdir_p(index_dir) if not any(fname.endswith(".bwt") for fname in os.listdir(index_dir)): # Create index if not os.path.exists(index_name): os.symlink(genome.filename, index_name) cmd = f"bwa index {index_name}" run_index_cmd("bwa", cmd)
def after_genome_download(self, genome, force=False): if not cmd_ok("bwa"): return # Create index dir index_dir = genome.props["bwa"]["index_dir"] index_name = genome.props["bwa"]["index_name"] if force: # Start from scratch rmtree(index_dir, ignore_errors=True) mkdir_p(index_dir) if not any(fname.endswith(".bwt") for fname in os.listdir(index_dir)): # Create index if not os.path.exists(index_name): os.symlink(genome.filename, index_name) cmd = "bwa index {}".format(index_name) run_index_cmd("bwa", cmd)
def after_genome_download(self, genome, threads=1, force=False): index_name = genome.plugin["star"]["index_name"] if not cmd_ok("STAR") or (os.path.exists(index_name) and not force): return index_dir = genome.plugin["star"]["index_dir"] rmtree(index_dir, ignore_errors=True) mkdir_p(index_dir) # gunzip genome if bgzipped and return up-to-date genome name fname, bgzip = gunzip_and_name(genome.filename) # index command cmd = ( f"STAR --runMode genomeGenerate --runThreadN {threads} " + f"--genomeFastaFiles {fname} --genomeDir {index_dir} " + f"--outFileNamePrefix {index_dir}" ) # if an annotation is present, generate a splice-aware index gtf_file = genome.annotation_gtf_file gzip_file = False if gtf_file: # gunzip if gzipped gtf_file, gzip_file = gunzip_and_name(gtf_file) # update index command with annotation cmd += f" --sjdbGTFfile {gtf_file}" else: print("\nCreating STAR index without annotation file.") # Create index run_index_cmd("star", cmd) # re-bgzip genome if gunzipped bgzip_and_name(fname, bgzip) # re-gzip annotation if gunzipped if gtf_file: gzip_and_name(gtf_file, gzip_file)
def after_genome_download(self, genome, force=False): if not cmd_ok("gmap_build"): return # Create index dir index_dir = genome.props["gmap"]["index_dir"] if force: # Start from scratch rmtree(index_dir, ignore_errors=True) if not os.path.exists(index_dir): # If the genome is bgzipped it needs to be unzipped first fname = genome.filename bgzip = False if fname.endswith(".gz"): ret = sp.check_call(["gunzip", fname]) if ret != 0: raise Exception("Error gunzipping genome {}".format(fname)) fname = re.sub(".gz$", "", fname) bgzip = True # gmap outputs a folder named genome.name # its content is moved to index dir, consistent with other plugins with TemporaryDirectory() as tmpdir: # Create index cmd = "gmap_build -D {} -d {} {}".format( tmpdir, genome.name, fname) run_index_cmd("gmap", cmd) # Move files to index_dir src = os.path.join(tmpdir, genome.name) move(src, index_dir) if bgzip: ret = sp.check_call(["bgzip", fname]) if ret != 0: raise Exception( "Error bgzipping genome {}. ".format(fname) + "Is tabix installed?")