def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if self.params["scope"] == "project": sample_list = ["project_data"] elif self.params["scope"] == "sample": sample_list = self.sample_data["samples"] else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") #sample_list = self.sample_data["samples"] for sample in sample_list: # Getting list of samples out of samples_hash if 'use_fasta' in list(self.params.keys()): if "fasta.nucl" not in list(self.sample_data[sample].keys()): raise AssertionExcept("No Nucleotide FASTA in: \n", sample) else: if len({"fastq.F", "fastq.R"} & set(self.sample_data[sample].keys())) == 1: raise AssertionExcept( "Sample has only forward or reverse reads. It must have either pairs or single reads\n", sample) if len({"fastq.F", "fastq.R", "fastq.S"} & set(self.sample_data[sample].keys())) == 3: raise AssertionExcept( "Kaiju is not defined for mixed paired and single reads\n", sample)
def step_specific_init(self): """ Called on intiation Good place for parameter testing. Wrong place for sample data testing """ self.shell = "bash" # Can be set to "bash" by inheriting instances self.file_tag = ".kaiju.out" if "-t" not in self.params["redir_params"]: raise AssertionExcept( "Please supply Name of nodes.dmp file via '-t' argument (in redirects)" ) if "-n" not in self.params["redir_params"]: raise AssertionExcept( "Please supply Name of names.dmp file via '-n' argument (in redirects)" ) if "-r" in self.params["redir_params"]: if isinstance(self.params["redir_params"]["-r"], list): self.levels = self.params["redir_params"]["-r"] elif isinstance(self.params["redir_params"]["-r"], str): self.levels = re.split("\s*,\s*", self.params["redir_params"]["-r"]) else: raise AssertionExcept( "Unknown format of '-r' redirects. Must be either string or list" ) self.params["redir_params"].pop("-r") else: self.levels = [ "phylum", "class", "order", "family", "genus", "species" ] if "scope" not in self.params: self.params["scope"] = "sample"
def step_specific_init(self): self.shell = "bash" # Can be set to "bash" by inheriting instances # Read YAML of plugin arguments with open( os.path.join(os.path.dirname(os.path.realpath(__file__)), "qiime2_arguments_index.yml"), "r") as fileh: filelines = fileh.readlines() self.qiime_args = yaml.load("".join(filelines), Loader=yaml.Loader) # extract qiime path, plugin name and method/pipeline/visualization from script_path self.qiime_path = self.params["script_path"].split(" ")[0] self.plugin = self.params["script_path"].split(" ")[1] self.method = self.params["script_path"].split(" ")[2] # Check plugin and method are recognized if self.plugin not in self.qiime_args: raise AssertionExcept( "Plugin '{plugin}' is not one of: {plugins}!".format( plugin=self.plugin, plugins=", ".join(list(self.qiime_args.keys())))) if self.method not in self.qiime_args[self.plugin]: raise AssertionExcept( "Plugin '{method}' is not one of: {methods}!".format( method=self.method, methods=", ".join(list( self.qiime_args[self.plugin].keys())))) # Get argument index for method self.method_index = self.qiime_args[self.plugin][self.method]
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ sample_has_nucl = project_has_nucl = False if "scope" not in self.params: # If all samples have fasta.nucl: if all(["fasta.nucl" in self.sample_data[x] for x in self.sample_data["samples"]]): sample_has_nucl = True if "fasta.nucl" in self.sample_data: project_has_nucl = True if sample_has_nucl and project_has_nucl: raise AssertionExcept("Both sample and project fasta exists. You must specify 'scope'") elif sample_has_nucl: self.params["scope"] = "sample" elif project_has_nucl: self.params["scope"] = "project" else: raise AssertionExcept("No fasta exists in either samples or project!") if self.params["scope"] == "sample": # Assert that all samples have nucleotide fasta files: for sample in self.sample_data["samples"]: try: self.sample_data[sample]["fasta.nucl"] except KeyError: raise AssertionExcept("Sample does not have a fasta file\n", sample) elif self.params["scope"] == "project": try: self.sample_data["project_data"]["fasta.nucl"] except KeyError: raise AssertionExcept("Project does not have a fasta file\n")
def step_specific_init(self): """ Called on intiation Good place for parameter testing. Wrong place for sample data testing """ self.shell = "bash" # Can be set to "bash" by inheriting instances self.file_tag = ".msh" if self.params["scope"] not in ["sample","project"]: raise AssertionExcept("'scope' must be either 'sample' or 'project'") if "src_scope" in self.params: if self.params["src_scope"] not in ["sample", "project"]: raise AssertionExcept("'scope' must be either 'sample' or 'project'") if self.params["src_scope"] == "project" and self.params["scope"] == "sample": raise AssertionExcept("Project 'src_scope' not defined for 'scope' sample.") else: self.params["src_scope"] = self.params["scope"] if "type" not in self.params: self.params["type"] = ["fastq","fasta"] else: if isinstance(self.params["type"], str): self.params["type"] = [self.params["type"]]
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if "type2del" not in self.params: raise AssertionExcept("You must pass a 'type2del' param!") type2del = self.params["type2del"] if "scope" not in self.params: raise AssertionExcept("You must pass a 'scope' param!") if self.params["scope"] == "sample": for sample in self.sample_data["samples"]: if type2del not in self.sample_data[sample]: raise AssertionExcept( "type %s does not exist for sample." % type2del, sample) else: del self.sample_data[sample][type2del] elif self.params["scope"] == "project": if type2del not in self.sample_data: raise AssertionExcept("type %s does not exist for project." % type2del) else: del self.sample_data[type2del] else: raise AssertionExcept( "'scope' param must be 'sample' or 'project'")
def step_specific_init(self): """ Called on intiation Good place for parameter testing. Wrong place for sample data testing """ self.shell = "bash" # Can be set to "bash" by inheriting instances self.file_tag = ".kraken.out" # Checking this once and then applying to each sample: if "--db" not in list(self.params["redir_params"].keys()): raise AssertionExcept("--db not set.\n") if "scope" in self.params: if self.params["scope"] == "project": pass elif self.params["scope"] == "sample": for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash pass else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") else: self.write_warning("No 'scope' specified. Using 'sample' scope") self.params["scope"] = "sample" # For backwards comaptibility: if "ktImportTaxonomy_path" in list(self.params): self.params["ktImportTaxonomy"] = dict() self.params["ktImportTaxonomy"]["path"] = self.params[ "ktImportTaxonomy_path"] self.params["ktImportTaxonomy"]["redirects"] = ""
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if "scope" not in list(self.params.keys()): # Try guessing scope: try: # Does a nucl fasta exist for project? self.sample_data["project_data"]["fasta.nucl"] except KeyError: self.params["scope"] = "sample" else: self.params["scope"] = "project" else: # Check scope is legitimate if not self.params["scope"] in ["project", "sample"]: raise AssertionExcept( "Scope must be either 'sample' or 'project'\n") if self.params["scope"] == "sample": for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash try: self.sample_data[sample]["fasta.nucl"] except KeyError: raise AssertionExcept( "Sample does not have a nucl fasta defined. Can't build index\n", sample)
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if not "scope" in self.params: raise AssertionExcept("No 'scope' specified.") if self.params["scope"] == "project": sample_list = ["project_data"] elif self.params["scope"] == "sample": sample_list = self.sample_data["samples"] else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") # Creating holder for output: for sample in sample_list: # Getting list of samples out of samples_hash # Make sure a file exists in the sample equivalent to dbtype: try: # In version 1.0.2, nucl and prot slots have been renamed to fasta.nucl and fasta.prot self.sample_data[sample]["fasta." + self.dbtype] except KeyError: raise AssertionExcept( "No file exists in sample for specified -dbtype (%s)\n" % self.dbtype, sample)
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if "reference" not in self.params: if "scope" not in self.params: raise AssertionExcept( "Please supply a scope parameter: either 'sample' or 'project'!" ) elif self.params["scope"] == "sample": for sample in self.sample_data["samples"]: if "fasta.nucl" not in self.sample_data[sample]: raise AssertionExcept( "No fasta.nucl defined for sample", sample) elif self.params["scope"] == "project": if "fasta.nucl" not in self.sample_data["project_data"]: raise AssertionExcept("No fasta.nucl defined for project") else: raise AssertionExcept( "Please supply a scope parameter: either 'sample' or 'project'!" ) else: if "scope" not in self.params: self.params["scope"] = "project" elif self.params["scope"] == "sample": self.write_warning( "It makes no sense to define a sample-scope external reference!" ) elif self.params["scope"] == "project": pass else: self.params["scope"] = "project"
def step_specific_init(self): self.shell = "bash" # Can be set to "bash" by inheriting instances self.file_tag = "BUSCO" if "scope" not in self.params: raise AssertionExcept( "Please specify a 'scope': Either 'sample' or 'project'.") for redir2remove in ["-i", "--in", "-o", "--out", "-t", "--tmp"]: if redir2remove in self.params["redir_params"]: del self.params["redir_params"][redir2remove] self.write_warning( "You are not supposed to specify %s in redirects. We set it automatically" % redir2remove) # Transfering redirected "-m" into "--mode" if "-m" in self.params["redir_params"]: self.params["redir_params"]["--mode"] = self.params[ "redir_params"]["-m"] del self.params["redir_params"]["-m"] # Checking --mode is legitimate (is udes to choose fasta.prot or fasta.nucl if "--mode" not in self.params[ "redir_params"] and "-m" not in self.params["redir_params"]: raise AssertionExcept(""" You must specify a 'mode': - geno or genome, for genome assemblies (DNA) - tran or transcriptome, for transcriptome assemblies (DNA) - prot or proteins, for annotated gene sets (protein)\n\n""")
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if self.params["scope"] == "project": sample_list = ["project_data"] elif self.params["scope"] == "sample": sample_list = self.sample_data["samples"] else: raise AssertionExcept("'scope' must be either 'sample' or 'project'") # if self.params["scope"] == "project": # # Initializing project bowtie2 slot # try: # self.sample_data["project_data"]["fasta.nucl"] # except KeyError: # raise AssertionExcept("Project does not have a nucl fasta defined. Check your 'scope'\n", sample) # # else: # # if "bowtie2.index" in self.sample_data.keys(): # # raise AssertionExcept("bowtie2 index already seems to exist.\n") # elif self.params["scope"] == "sample": for sample in sample_list: # Getting list of samples out of samples_hash try: self.sample_data[sample]["fasta.nucl"] except KeyError: raise AssertionExcept("No 'fasta.nucl' defined. Can't build index\n", sample) else: if "bowtie2.index" in list(self.sample_data[sample].keys()): raise AssertionExcept("bowtie2 index already exists for sample.\n", sample)
def set_bed(self, action_numbered, sample): if self.params[action_numbered] and \ isinstance(self.params[action_numbered],dict) and \ "bed" in self.params[action_numbered]: # If 1. params exist, 2. it is a dictionary and 3. it has a 'bed' defined if self.params[action_numbered]["bed"] == "sample": if "bed" in self.sample_data[sample]: bed = self.sample_data[sample]["bed"] else: raise AssertionExcept( "No 'bed' defined for sample in '{action}'".format( action=action_numbered), sample) elif self.params[action_numbered]["bed"] == "project": if "bed" in self.sample_data["project_data"]: bed = self.sample_data["project_data"]["bed"] else: raise AssertionExcept( "No 'bed' defined for project in '{action}'".format( action=action_numbered)) elif not self.params[action_numbered]["bed"]: raise AssertionExcept( "Value for 'bed' in {action} must be 'sample', 'project' or a path" .format(action=action_numbered)) else: bed = self.params[action_numbered]["bed"] else: bed = "" return bed
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating """ if self.params["redir_params"]["--mode"] in [ 'geno', 'genome', 'tran', 'transcriptome' ]: self.type = "nucl" elif self.params["redir_params"]["--mode"] in ['prot', 'proteins']: self.type = "prot" else: raise AssertionExcept( "The value you passed to --mode ({mode}) is not a valid value". format(mode=self.params["redir_params"]["--mode"])) if self.params["scope"] == "sample": # Check that "fasta" and "assembly" exist (signs that trinity has been executed) for sample in self.sample_data["samples"]: if ("fasta.%s" % self.type) not in self.sample_data[sample]: raise AssertionExcept( "It seems there is no sample-wide %s fasta file." % self.type, sample) elif self.params["scope"] == "project": if ("fasta.%s" % self.type) not in self.sample_data["project_data"]: raise AssertionExcept( "It seems there is no project-wide %s fasta file." % self.type) else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'.")
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if self.params["scope"] == "project": # Initializing project bwa slot try: self.sample_data["project_data"]["fasta.nucl"] except KeyError: raise AssertionExcept( "Project does not have a nucl fasta defined. Check your 'scope'\n", sample) else: if "bwa_index" in list(self.sample_data.keys()): raise AssertionExcept( "bwa index already seems to exist.\n") elif self.params["scope"] == "sample": for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash try: self.sample_data[sample]["fasta.nucl"] except KeyError: raise AssertionExcept( "Sample does not have a nucl fasta defined. Can't build index\n", sample) else: if "bwa_index" in list(self.sample_data[sample].keys()): raise AssertionExcept( "bwa index already exists for sample.\n", sample) else: raise AssertionExcept("Scope must be either 'sample' or 'project'")
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if 'fasta.nucl' not in list(self.sample_data["project_data"].keys()): raise AssertionExcept("No Project level FASTA Nucleotide File was Found") if 'isoform.raw_counts' not in list(self.sample_data["project_data"].keys()): raise AssertionExcept("No Project level Isoform Raw Counts File was Found")
def step_specific_init(self): self.shell = "bash" # Can be set to "bash" by inheriting instances # self.file_tag = "Bowtie_mapper" # Check if you can split the script_path (by space) into path + mod: try: # try splitting script_path by space and extracting the mod from the snd element: mod = re.split("\s+", self.params["script_path"])[1] except IndexError: mod = None # Reduce script_path to path only. The mod is treated separately. self.params["script_path"] = re.split("\s+", self.params["script_path"])[0] # Make sure mod is defined only once, and if passed through script_path, add to params. if "mod" in self.params: if (mod): raise AssertionExcept( "You supplied mod as parameter as well as in script path.") else: if (mod): self.params["mod"] = mod else: raise AssertionExcept( "You must supply a 'mod' parameter or add the mod to the end of the script path.\n\te.g. /path/to/bwa mem" )
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating """ # Assert that all samples have reads files: for sample in self.sample_data["samples"]: if not {"fastq.F", "fastq.R", "fastq.S"} & set( self.sample_data[sample].keys()): raise AssertionExcept("No read files\n", sample) if "scope" in self.params: if self.params["scope"] == "project": pass elif self.params["scope"] == "sample": for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash pass else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") else: raise AssertionExcept("No 'scope' specified.") ########################## pass
def step_specific_init(self): """ Called on intiation Good place for parameter testing. Wrong place for sample data testing """ self.shell = "bash" # Can be set to "bash" by inheriting instances self.file_tag = ".blast.parsed" if "blast_merge_path" in self.params: raise AssertionExcept( "Please convert 'blast_merge_path' into the new path/redirects format!" ) if "blast_merge" in self.params: try: # Testing existence of "path" and, if empty, extracting from main script_path if self.params["blast_merge"]["path"] is None: self.params["blast_merge"]["path"] = re.sub( pattern="parse_blast", repl="compare_blast_parsed_reports", string=self.params["script_path"]) # Testing existence and stringifying redirects if not isinstance(self.params["blast_merge"]["redirects"], str): self.params["blast_merge"]["redirects"] = " \\\n\t".join([ key + " " + (val if val else "") for key, val in self.params["blast_merge"]["redirects"].items() ]) self.params["blast_merge"][ "redirects"] = "\n\t{redirs} \\".format( redirs=self.params["blast_merge"]["redirects"]) except KeyError: raise AssertionExcept( "Please add path and redirects to `blast_merge` block")
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating """ if self.params["scope"] == "project": sample_list = ["project_data"] elif self.params["scope"] == "sample": sample_list = self.sample_data["samples"] else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") for sample in sample_list: if "fasta.nucl" not in self.sample_data[sample]: raise AssertionExcept("No 'fasta.nucl' defined!", sample) if "Predict" in self.params or re.search( "Predict", self.params["script_path"]): # Adding directory from previous run: if "transdecoder.dir" not in self.sample_data[sample]: raise AssertionExcept( "Please include a 'LongOrf' TransDecoder step before the 'Predict' step.\n" "You can include 'blatsp' and 'hmmscan' steps in between to make it effective" )
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if "scope" in self.params: if self.params["scope"] == "project": if not "blast.nucl" in self.sample_data[ "project_data"] and not "blast.prot" in self.sample_data[ "project_data"]: raise AssertionExcept( "There are no project BLAST results.\n") elif self.params["scope"] == "sample": # Checking all samples have a 'blast' file-type in sample_data for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash if not "blast.nucl" in self.sample_data[ sample] and not "blast.prot" in self.sample_data[ sample]: raise AssertionExcept("There are no BLAST results.\n", sample) else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") else: raise AssertionExcept("No 'scope' specified.")
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if self.params["scope"] == "project": sample_list = ["project_data"] elif self.params["scope"] == "sample": sample_list = self.sample_data["samples"] else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") for sample in sample_list: if self.params["input"] == "vcf": try: self.sample_data[sample]["vcf"] except KeyError: raise AssertionExcept("No VCF variants file.", sample) elif self.params["input"] == "bcf": try: self.sample_data[sample]["bcf"] except KeyError: raise AssertionExcept("No BCF variants file.", sample) else: try: self.sample_data[sample]["gzVCF"] except KeyError: raise AssertionExcept( "No 'gzVCF' (gzipped VCF) variants file.", sample)
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating """ if "scope" not in self.params: raise AssertionExcept("No 'scope' specified.") elif self.params["scope"] == "project": if "fasta.nucl" not in self.sample_data[ "project_data"] or "gtf" not in self.sample_data[ "project_data"]: raise AssertionExcept( "Project does not have fasta.nucl and gtf files.") elif self.params["scope"] == "sample": for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash if "fasta.nucl" not in self.sample_data[ sample] or "gtf" not in self.sample_data[sample]: raise AssertionExcept( "Sample does not have fasta.nucl and gtf files.", sample) else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'")
def step_specific_init(self): self.shell = "bash" # Can be set to "bash" by inheriting instances if "type" not in self.params: raise AssertionExcept( "Please specify the fasta type to use: type = nucl or prot") if self.params["type"] not in ["nucl", "prot"]: raise AssertionExcept("'type' must be 'nucl' or 'prot'") if "output_type" not in self.params: raise AssertionExcept(""" Please specify the output_type to use: * tblout : save parseable table of per-sequence hits * domtblout : save parseable table of per-domain hits * pfamtblout : save table of hits and domains in Pfam format """) if self.params["output_type"] not in [ "tblout", "domtblout", "pfamtblout" ]: raise AssertionExcept(""" 'output_type' must be one of the following: * tblout : save parseable table of per-sequence hits * domtblout : save parseable table of per-domain hits * pfamtblout : save table of hits and domains in Pfam format """) if "hmmdb" not in self.params: raise AssertionExcept("Please specify the hmmdb to use!")
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating """ if "trinity" not in [ self.pipe_data["names_index"][step] for step in self.get_depend_list() ]: self.write_warning( "No trinity in history. Are you sure of what you are attempting to do?" ) if "scope" in self.params: if self.params["scope"] == "project": if not "fasta.nucl" in self.sample_data["project_data"]: raise AssertionExcept( "No fasta file of type 'nucl' in project\n") elif self.params["scope"] == "sample": for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash if not "fasta.nucl" in self.sample_data[sample]: raise AssertionExcept("No fasta file of type 'nucl'\n", sample) else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") else: raise AssertionExcept("No 'scope' specified.")
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data """ if "scope" in list(self.params.keys()): if self.params["scope"] == "project": try: # Is there a mega-assembly? self.sample_data["project_data"]["fasta.nucl"] except KeyError: # No. Check if all samples have assemblies: raise AssertionExcept("No project wide assembly!") else: pass if "compare_mode" in list(self.params.keys()): self.write_warning( "Ignoring 'compare_mode' in project scope") elif self.params["scope"] == "sample": for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash # Make sure each sample has a ["fasta.nucl"] slot try: self.sample_data[sample]["fasta.nucl"] except KeyError: raise AssertionExcept( "You are trying to run QUAST with no assembly.\n", sample) else: pass else: raise AssertionExcept( "'scope' must be either 'project' or 'sample'") else: self.write_warning("'scope' not passed. Will try guessing...") try: # Is there a mega-assembly? self.sample_data["project_data"]["fasta.nucl"] except KeyError: # No. Check if all samples have assemblies: for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash # Make sure each sample has a ["fasta.nucl"] slot try: self.sample_data[sample]["fasta.nucl"] except KeyError: raise AssertionExcept( "You are trying to run QUAST with no assembly.\n", sample) self.params["scope"] = "sample" else: self.write_warning( "There is a project-wide assembly. Using it.\n") self.params["scope"] = "project"
def step_sample_initiation(self): """ A place to do initiation stages following setting of sample_data Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating """ if "scope" in self.params: if self.params["scope"] == "project": if "fasta.nucl" not in self.sample_data["project_data"]: raise AssertionExcept( "Project does not have a nucl fasta.") if "fasta.prot" not in self.sample_data["project_data"]: raise AssertionExcept( "Project does not have a prot fasta.") if "hmmscan.prot" not in self.sample_data["project_data"]: raise AssertionExcept( "Project does not have a prot hmmscan output file.") for type in [ "gene_trans_map", "transcripts.fasta.nucl", "fasta.prot" ]: if type not in self.sample_data["project_data"]: raise AssertionExcept( "Project does not have a {type} file.".format( type=type)) # # trans_map = self.sample_data["project_data"]["gene_trans_map"], # trans_fa = self.sample_data["project_data"]["transcripts.fasta.nucl"], # pep_fa = self.sample_data["project_data"]["fasta.prot"]) elif self.params["scope"] == "sample": for sample in self.sample_data[ "samples"]: # Getting list of samples out of samples_hash if "fasta.nucl" not in self.sample_data[sample]: raise AssertionExcept( "Project does not have a nucl fasta.") if "fasta.prot" not in self.sample_data[sample]: raise AssertionExcept( "Project does not have a prot fasta.") if "hmmscan.prot" not in self.sample_data[sample]: raise AssertionExcept( "Project does not have a prot hmmscan output file." ) for type in [ "gene_trans_map", "transcripts.fasta.nucl", "fasta.prot" ]: if type not in self.sample_data[sample]: raise AssertionExcept( "Project does not have a {type} file.".format( type=type)) else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") else: raise AssertionExcept("No 'scope' specified.")
def step_specific_init(self): """ Called on intiation Good place for parameter testing. Wrong place for sample data testing """ self.shell = "bash" # Can be set to "bash" by inheriting instances self.file_tag = ".metaphlan.out" # self.auto_redirs = "--input_type".split(" ") if "--input_type" in self.params["redir_params"]: self.write_warning( "At the moment metaphlan supports only --input_type fastq. Ignoring the value you passed\n" ) self.params["redir_params"]["--input_type"] = "fastq" if "--bowtie2out" in self.params["redir_params"] and self.params[ "redir_params"]["--bowtie2out"]: self.write_warning( "Ignoring the value you passed for --bowtie2out.\nWill store data in sample specific location\n" ) if "--biom" in self.params["redir_params"] and self.params[ "redir_params"]["--biom"]: self.write_warning( "Ignoring the value you passed for --biom.\nWill store data in sample specific location\n" ) if "merge_metaphlan_tables" in self.params: if not isinstance(self.params["merge_metaphlan_tables"], dict): if self.params["merge_metaphlan_tables"]: raise AssertionExcept( "'merge_metaphlan_tables' must be empty or a block with 'path' and optionally 'redirects'" ) else: self.params["merge_metaphlan_tables"] = dict() if "path" not in self.params["merge_metaphlan_tables"]: self.params["merge_metaphlan_tables"]["path"] = os.sep.join([ os.path.basename(self.params["script_path"]), "utils/merge_metaphlan_tables.py" ]) self.write_warning( "You did not provided a path in 'merge_metaphlan_tables'. Using '{path}'" .format( path=self.params["merge_metaphlan_tables"]["path"])) if "ktImportText" in self.params: if not isinstance( self.params["ktImportText"], dict) or "path" not in self.params["ktImportText"]: raise AssertionExcept( "Please include a 'path' in the 'ktImportText' block.") else: self.write_warning( "You did not supply a 'ktImportText' block. Will not create krona reports...\n" )
def build_scripts(self): # Set list of samples to go over. Either self.sample_data["samples"] for sample scope # or ["project_data"] for project scope if self.params["scope"] == "project": sample_list = ["project_data"] elif self.params["scope"] == "sample": sample_list = self.sample_data["samples"] else: raise AssertionExcept( "'scope' must be either 'sample' or 'project'") for sample in sample_list: # Getting list of samples out of samples_hash # Name of specific script: self.spec_script_name = self.set_spec_script_name(sample) self.script = "" # Make a dir for the current sample: sample_dir = self.make_folder_for_sample(sample) # This line should be left before every new script. It sees to local issues. # Use the dir it returns as the base_dir for this step. use_dir = self.local_start(sample_dir) self.script += "# Moving into output location\n" self.script += "cd %s \n\n" % use_dir self.script += self.get_script_const() # The results will be put in data/step_name/name/Title self.script += "--out %s \\\n\t" % sample self.script += "--in %s \\\n\t" % self.sample_data[sample][ "fasta.%s" % self.type] self.script += "--tmp %s \\\n\t" % os.path.join(use_dir, "tmp") if "--lineage" not in self.params["redir_params"]: if "BUSCO.lineage" in self.sample_data["project_data"]: self.script += "--lineage %s \\\n\t" % self.sample_data[ "project_data"]["BUSCO.lineage"] else: raise AssertionExcept( "Please supply a lineage, either via redirects (--lineage) or via 'get_lineage'" ) self.script = self.script.rstrip("\\\n\t") # Store results to fasta and assembly slots: self.sample_data[sample]["BUSCO"] = os.path.join( sample_dir, "run_%s" % sample) # Move all files from temporary local dir to permanent base_dir self.local_finish( use_dir, sample_dir ) # Sees to copying local files to final destination (and other stuff) self.create_low_level_script()
def get_action_output_type(self, sample, action, redirects): if redirects is not None: if re.search("\-\w*O\s", redirects): type = re.search("\-\w*O\s+(\w+)", redirects) if type.group(1).lower() not in ["bam", "sam", "cram"]: raise AssertionExcept( "Bad value for output format ({type})".format( type=type.group(1))) else: return type.group(1).lower() if action == "view": if not redirects: return "sam" elif re.search("\-\w*b", redirects): return "bam" elif re.search("\-\w*C", redirects): return "cram" else: return "sam" elif action == "sort": return "bam" elif action == "index": if self.active_type == "bam": return "bai" elif self.active_type == "cram": return "crai" else: raise AssertionExcept( "No 'bam' or 'cram' for 'samtools index'", sample) elif action in ["flagstat", "stats", "idxstats", "depth", "bedcov"]: return action elif action in ["fasta", "fastq"]: return self.active_type elif action in ["mpileup"]: if not redirects: return "mpileup" elif re.search("\-\w*v", redirects) or re.search( "\-\-VCF", redirects): return "vcf" elif re.search("\-\w*g", redirects) or re.search( "\-\-BCF", redirects): return "bcf" else: return "mpileup" elif action in ["merge"]: return "bam" elif action in ["addreplacerg"]: return "bam" elif action == "fixmate": return "bam" elif action == "markdup": return "bam"