def wedring_indexbuilder_parse_params(self): """This method sets **Wedring**'s index builder specific parameters.""" if self._i is None: self._ib = True self._indexbldr = IndexBuilder(out_dir=self._id, ref_seq=self._r, index_label=self._il, cfg_file=self._c, quiet=self._qt)
class Wedring(object): """This class is the the manager of the **Wedring** pipeline. It manages all the parameters and all the steps of the pipeline. """ def __init__(self, arguments=None): """:class:Wedring constructor. :param arguments: List of arguments, usually command line ones. :type arguments: list """ self._wedr_list = [] self._num_threads = 1 self._qt = False # Quietly? self._o = "./wedr_out" # Output directory. self._id = "./wedr_index" # Bowtie's BW index output directory. self._m = "bowtie" # Mapper. self._r = None # Reference sequence. self._i = None # Bowtie's EBWT index. self._l = [None] # Single-end library. self._1 = [None] # Pair-end library mate 1. self._2 = [None] # Pair-end library mate 2. self._q = [None] # Single-end library quality. self._q1 = [None] # Pair-end library mate 1 quality. self._q2 = [None] # Pair-end library mate 2 quality. self._cf = [None] # Coverage files. self._a = None # Annotation file. self._c = None # Configuration file. self._ml = [None] # Mapping label. self._il = None # Index label. self._cnd = None # Experiment conditions. self._cnt_table = None # Name of the counts table. self._ib = False # Build Bowtie's BW index? self._wb = TTL_PIPELINE # Wedring's barrier. self._indexbldr = None # Wedring's IndexBuilder instance. self._ld = None # Directory where log files are saved. self.wedring_parse_args(arguments) def wedring_parse_args(self, arguments): """This method parses the :class:Wedring arguments and make some adjustments. :param arguments: List of arguments, usually command line ones. :type arguments: list :raises: :class:WedringError """ try: if len(arguments) < 1: raise WedringError(133, "Insufficient number of arguments.") except TypeError: raise WedringError(133, "Arguments were not provided.") bin_path = None # path to add to system path group_all = False if "--group-all" in arguments: group_all = True arguments.remove("--group-all") # Lambda function to adjust parameters according to the --group-all # argument value. adjust_param = lambda params: ([param for param in params.split(',') if param != ''] if not group_all else [param]) # Adjusting the value of the Wedring barrier to define wich pipeline # stage will be executed if "--just-indexbuild" in arguments: self._wb = JUST_INDEX arguments.remove("--just-indexbuild") if "--just-map" in arguments: self._wb = JUST_MAP arguments.remove("--just-map") if "--just-counttable" in arguments: self._wb = JUST_TABLE arguments.remove("--just-counttable") if "--just-de" in arguments: self._wb = JUST_DE arguments.remove("--just-de") try: opts = getopt(arguments, "n:o:x:m:r:i:l:1:2:q:a:g:t:c:t:d:p:", ["num-threads=", "out-dir=", "index-dir=", "quiet", "mapper=", "ref-sequence=", "bw-index=", "lib-file=", "pair-mate-1=", "pair-mate-2=", "quals=", "q1=", "q2=", "annot-file=", "coverage-files=", "config-file=", "count-table=", "map-label=", "index-label=", "conditions=", "path="]) if opts[1] != []: raise WedringError(132, "Argument list not supported: %s." % " ".join(opts[1])) for opt, val in opts[0]: if opt in ("-n", "--num-threads"): self._num_threads = int(val) elif opt in ("--quiet"): self._qt = True elif opt in ("-m", "--mapper"): self._m = val elif opt in ("-r", "--ref-sequence"): self._r = val elif opt in ("-i", "--bw-index"): self._i = val elif opt in ("-a", "--annot-file"): self._a = val elif opt in ("-c", "--config-file"): self._c = val elif opt in ("-t", "--count-table"): self._cnt_table = val elif opt == "--index-label": self._il = val elif opt in ("-d", "--conditions"): self._cnd = val elif opt in ("-x", "--index-dir"): self._id = val elif opt in ("-o", "--out-dir"): self._o = val elif opt in ("-l", "--lib-file"): self._l = adjust_param(val) elif opt in ("-1", "--pair-mate-1"): self._1 = adjust_param(val) elif opt in ("-2", "--pair-mate-2"): self._2 = adjust_param(val) elif opt in ("-q", "--quals"): self._q = adjust_param(val) elif opt == "--q1": self._q1 = adjust_param(val) elif opt == "--q2": self._q2 = adjust_param(val) elif opt in ("-g", "--coverage-files"): if ',' not in val: raise WedringError(134, "You must provide a list of coverage files.") else: self._cf = [cf for cf in val.split(',') if cf != ''] elif opt == "--map-label": self._ml = adjust_param(val) elif opt in ("-p", "--path"): bin_path = val except GetoptError as err: raise WedringError(136, "%s." % str(err).capitalize()) if bin_path is not None: environ["PATH"] = path.pathsep.join((environ["PATH"], path)) if self._wb != JUST_INDEX: self._ld = path.join(self._o, "log") if self._a: if not self._qt: wedr_report("Validating GFF file: \'%s\'." % self._a) try: gff_out = write_validated_gff(self._a) except GffFormatError as gffferr: raise WedringError(134, "[%s] %s" % (type(gffferr).__name__, gffferr)) except EnvironmentError as env_err: raise WedringError(env_err.errno, "[%s (%d)] %s%s%s." % (type(env_err).__name__, env_err.errno, env_err.strerror, ': ' if env_err.filename else '', env_err.filename if env_err.filename else '')) if isinstance(gff_out, str): if not self._qt: wedr_report("Now using validated GFF file: \'%s\'." % gff_out) self._a = gff_out if not self._qt: wedr_report("Gff validation - Done!") else: if not self._qt: wedr_report("Gff validation - Done!") if self._wb in (TTL_PIPELINE, JUST_TABLE, JUST_DE): if self._cnd is None: raise WedringError(135, "You must set the experimental conditions.") if self._wb in (TTL_PIPELINE, JUST_MAP): if self._l != [None] and self._q == [None]: self._q += [None] * (len(self._l) - 1) if self._l != [None] and self._ml == [None]: self._ml += [None] * (len(self._l) - 1) if (self._1 != [None] and self._2 != [None] and self._q1 == [None] and self._q1 == [None]): self._q1 += [None] * (len(self._1) - 1) self._q2 += [None] * (len(self._2) - 1) if self._1 != [None] and self._2 != [None] and self._ml == [None]: self._ml += [None] * (len(self._1) - 1) elif self._wb == JUST_DE and self._cnt_table is None: raise WedringError(135, "You must provide the counting table.") def wedring_indexbuilder_parse_params(self): """This method sets **Wedring**'s index builder specific parameters.""" if self._i is None: self._ib = True self._indexbldr = IndexBuilder(out_dir=self._id, ref_seq=self._r, index_label=self._il, cfg_file=self._c, quiet=self._qt) def wedring_mapping_parse_params(self): """This method sets **Wedring**'s mapping stage specific parameters. :raises: :class:WedringError """ wedr_append = self._wedr_list.append if self._cnd is None: _next = count(1).next else: _next = prepare_table_header(self._cnd).next if self._wb != JUST_TABLE: if self._wb != JUST_INDEX: if self._l != [None]: if self._q != [None]: for idx, lib in enumerate(self._l): try: wedr = WedringMast(mapper=self._m, out_dir=self._o, index=self._i, lib_file=lib, qual_file=self._q[idx], annot_file=self._a, cfg_file=self._c, map_label=self._ml[idx], barrier=self._wb, quiet=self._qt, id_=_next()) except IndexError: raise WedringError(140, "Unbalanced options -l/--lib-file, --quals, --map-label or -o/--out-dir.") wedr_append(wedr) else: for idx, lib in enumerate(self._l): try: wedr = WedringMast(mapper=self._m, out_dir=self._o, index=self._i, lib_file=lib, annot_file=self._a, cfg_file=self._c, map_label=self._ml[idx], barrier=self._wb, quiet=self._qt, id_=_next()) except IndexError: raise WedringError(140, "Unbalanced options -l/--lib-file, --map-label or -o/--out-dir.") wedr_append(wedr) elif self._1 != [None]: if self._q1 != [None]: for idx, lib_1 in enumerate(self._1): try: wedr = WedringMast(mapper=self._m, out_dir=self._o, index=self._i, lib_mate_1=lib_1, lib_mate_2=self._2[idx], q_mate_1=self._q1[idx], q_mate_2=self._q2[idx], annot_file=self._a, cfg_file=self._c, map_label=self._ml[idx], barrier=self._wb, quiet=self._qt, id_=_next()) except IndexError: raise WedringError(140, "Unbalanced options -1/--pair-mate-1, -2/--pair-mate-2, --q1, --q2, --map-label or -o/--out-dir.") wedr_append(wedr) else: for idx, lib_1 in enumerate(self._1): try: wedr = WedringMast(mapper=self._m, out_dir=self._o, index=self._i, lib_mate_1=lib_1, lib_mate_2=self._2[idx], annot_file=self._a, cfg_file=self._c, map_label=self._ml[idx], barrier=self._wb, quiet=self._qt, id_=_next()) except IndexError: raise WedringError(140, "Unbalanced options -1/--pair-mate-1, -2/--pair-mate-2, --map-label or -o/--out-dir.") wedr_append(wedr) else: for cf in self._cf: wedr_append(WedringMast(annot_file=self._a, cov_file=cf, barrier=self._wb, id_=_next())) def exec_deseq(self): """Execute the **Wedring**'s differential expression stage. :raises: :class:WedringError """ if not self._qt: wedr_report("Calculating differential expression with DESeq.") outfile = path.join(self._o, "diffexpr.txt") errfile = path.join(self._ld, "diffexpr.log") de_cmd_line = "Rscript --vanilla %s %s %s %s %s" % (wedr_which("diffExprStage.R"), self.wedring_diffexpr_parse_options(), self._cnt_table, self._cnd, outfile) de = BioSoft(de_cmd_line, errfile=errfile) if not self._qt: wedr_report("Command line:\n %s" % de.command) de.run() if 0 != de.return_code != -1: raise WedringError(141, "%s exitted with status %d. See log file '%s' for more details." % (de.program_name, de.return_code, de.errfile)) wedr_clean(de.errfile) # TODO Add verification of the DESeq's output with wedr_check_path() # \_ table (OK), graphics wedr_check_path(outfile) if not self._qt: wedr_report("DESeq - Done!") def wedring_diffexpr_parse_options(self): """Parser for the DE section of the configuration file. :returns: Additional options for the **Wedring**'s Differential Expression stage. """ de_cmd = "" params = {"method": "pooled", "sharing_mode": "maximum", "fit_type": "parametric", "ma_plot": "false", "volcano_plot": "false", "dispest_plot": "false", "alpha": "0.05", "img_size": "medium"} cf_parser = RawConfigParser() cf_parser.read(self._c) for param, val in cf_parser.items("DE"): params[param] = val for param, val in params.iteritems(): if param == "method" and val != "pooled": de_cmd += " -method=%s" % val elif param == "sharing_mode" and val != "maximum": de_cmd += " -sharing.mode=%s" % val elif param == "fit_type" and val != "parametric": de_cmd += " -fit.type=%s" % val elif param == "ma_plot" and val != "false": de_cmd += " -ma.plot" elif param == "volcano_plot" and val != "false": de_cmd += " -volcano.plot" elif param == "dispest_plot" and val != "false": de_cmd += " -dispest.plot" elif param == "p_value" and val != "0.05": de_cmd += " -p.value=%s" % val elif param == "img_size" and val != "medium": de_cmd += " -img.size=%s" % val return de_cmd def run(self): """Execute all steps of the **Wedring** pipeline.""" # This method will execute according to the self._wb value: # The value are set after the command line options --just-indexbuild, # --just-map, --just-counttable, --just-de, and the possible values are # defined in the globals TTL_PIPELINE, JUST_INDEX, JUST_MAP, JUST_TABLE # and JUST_DE, which mean: # TTL_PIPELINE -- execute all steps of the pipeline # JUST_INDEX -- just execute the indexing stage # JUST_MAP -- execute the indexing stage (if needed) and the mapping # stage # JUST_TABLE -- just build the count table # JUST_DE -- just execute the differential expression stage if self._wb in (TTL_PIPELINE, JUST_INDEX, JUST_MAP): self.wedring_indexbuilder_parse_params() if self._indexbldr is not None: self._indexbldr.run() self._i = self._indexbldr.index if self._wb != JUST_INDEX: wedr_prepare_directory(self._o) wedr_prepare_directory(self._ld) self.wedring_mapping_parse_params() p = Pool(self._num_threads) self._wedr_list = p.map(WedringMast.exec_mapping_stage, self._wedr_list) if self._wb != JUST_MAP: feats_file = path.join(self._o, "genomic_features.txt") tbl_file = path.join(self._o, "count_table.txt") if not self._qt: wedr_report("Writing genomic features to disk.") write_genomic_features_to_file(self._a, feats_file) if wedr_check_path(feats_file): if not self._qt: wedr_report("Writing genomic features - Done!") cov_f = [wedrmast.cov_file for wedrmast in self._wedr_list] if not self._qt: wedr_report("Writing count table to disk.") write_count_table_to_file(self._a, cov_f, self._cnd, tbl_file) if wedr_check_path(tbl_file): self._cnt_table = tbl_file if not self._qt: wedr_report("Writing count table - Done!") self.exec_deseq() elif self._wb == JUST_TABLE: self.wedring_mapping_parse_params() wedr_prepare_directory(self._o) feats_file = path.join(self._o, "genomic_features.txt") tbl_file = path.join(self._o, "count_table.txt") if not self._qt: wedr_report("Writing genomic features to disk.") write_genomic_features_to_file(self._a, feats_file) if wedr_check_path(feats_file): if not self._qt: wedr_report("Writing genomic features - Done!") cov_f = [wedrmast.cov_file for wedrmast in self._wedr_list] if not self._qt: wedr_report("Writing count table to disk.") write_count_table_to_file(self._a, cov_f, self._cnd, tbl_file) if wedr_check_path(tbl_file): self._cnt_table = tbl_file if not self._qt: wedr_report("Writing count table - Done!") elif self._wb == JUST_DE: wedr_prepare_directory(self._o) wedr_prepare_directory(self._ld) self.exec_deseq()