def __init__(self, design_filename, adapters): """.. rubric:: Constructor :param str design_filename: a CSV file that is compatible with our :class:`sequana.expdesign.ExpDesignAdapter` :param adapters: the type of adapters (PCRFree, Nextera, Rubicon, TruSeq, SMARTer, Small) The files of adapters are stored in Sequana and accessible with the sequana_data function. So, for instance if adapters is set to Nextera, the following file is used to identify the adapters:: sequana_data("adapters_Nextera_fwd.fa") New adapters files can be added on request. Currently, Nextera and PCRFree are available. Rubicon and TruSeq will be added soon. """ from sequana.expdesign import ExpDesignAdapter self.design = ExpDesignAdapter(design_filename) if self.design.df.index.name == "Sample_ID" or \ "Sample_ID" in self.design.df.columns: self.design.df.set_index("Sample_ID", inplace=True) else: raise ValueError("Incorrect design file. Missing Sample_ID field") self.adapters = adapters file1 = sequana_data("adapters_%s_fwd.fa" % adapters) file2 = sequana_data("adapters_%s_revcomp.fa" % adapters) self._adapters_fwd = AdapterReader(file1) self._adapters_revc = AdapterReader(file2) # !!! revcomp
def get_sequana_adapters(type_, direction): """Return path to a list of adapters in FASTA format :param tag: PCRFree, Rubicon, Nextera :param type_: fwd, rev, revcomp :return: path to the adapter filename """ # search possible types registered = _get_registered_adapters() if type_ not in registered: logger.error("This adapter type (%s) is not valid" % type_) logger.error("choose one in %s types" % registered) raise ValueError directions = ["fwd", "rev", "revcomp"] if direction not in directions: logger.error("This kind of tag (%s) is not valid" % direction) logger.error("choose one in %s " % directions) raise ValueError try: this = sequana_data("adapters_%s_%s.fa" % (type_, direction)) logger.warning("Rename {} (remove the adapters_ prefix)".format(this)) return this except: return sequana_data("%s_%s.fa" % (type_, direction))
def _get_registered_adapters(): filenames = sequana_data('*', 'data/adapters') filenames = [x for x in filenames if "_fwd" in x] registered = [ x.lstrip("adapters_").replace("_fwd.fa", "") for x in filenames ] registered = set(registered) return registered
def __init__(self, design_filename, adapters): """.. rubric:: Constructor :param str design_filename: a CSV file that is compatible with our :class:`sequana.expdesign.ExpDesignAdapter` :param adapters: the type of adapters (PCRFree, Nextera, Rubicon, TruSeq, SMARTer, Small) The files of adapters are stored in Sequana and accessible with the sequana_data function. So, for instance if adapters is set to Nextera, the following file is used to identify the adapters:: sequana_data("adapters_Nextera_fwd.fa") New adapters files can be added on request. See resources/data/adapters for the full list. You can also use:: from sequana.adapters import _get_registered_adapters _get_registered_adapters() """ from sequana.expdesign import ExpDesignAdapter self.design = ExpDesignAdapter(design_filename) if self.design.df.index.name == "Sample_ID" or \ "Sample_ID" in self.design.df.columns: self.design.df.set_index("Sample_ID", inplace=True) else: raise ValueError("Incorrect design file. Missing Sample_ID field") self.adapters = adapters try: file1 = sequana_data("adapters_%s_fwd.fa" % adapters) logger.warning("rename your file removing prefix adatper") except: file1 = sequana_data("%s_fwd.fa" % adapters) try: file2 = sequana_data("adapters_%s_revcomp.fa" % adapters) logger.warning("rename your file removing prefix adatper") except: file2 = sequana_data("%s_revcomp.fa" % adapters) self._adapters_fwd = AdapterReader(file1) self._adapters_revc = AdapterReader(file2) # !!! revcomp
def __init__(self, design_filename, adapters): """.. rubric:: Constructor :param str design_filename: a CSV file that is compatible with our :class:`sequana.iem.IEM` :param adapters: the type of adapters (PCRFree, Nextera, Rubicon, TruSeq, SMARTer, Small) The files of adapters are stored in Sequana and accessible with the sequana_data function. So, for instance if adapters is set to Nextera, the following file is used to identify the adapters:: sequana_data("adapters_Nextera_fwd.fa") New adapters files can be added on request. See resources/data/adapters for the full list. You can also use:: from sequana.adapters import _get_registered_adapters _get_registered_adapters() """ from sequana.iem import IEM self.design = IEM(design_filename) self.adapters = adapters try: file1 = sequana_data("adapters_%s_fwd.fa" % adapters) logger.warning("rename your file removing prefix adatper") except: file1 = sequana_data("%s_fwd.fa" % adapters) try: file2 = sequana_data("adapters_%s_revcomp.fa" % adapters) logger.warning("rename your file removing prefix adatper") except: file2 = sequana_data("%s_revcomp.fa" % adapters) self._adapters_fwd = AdapterReader(file1) self._adapters_revc = AdapterReader(file2) # !!! revcomp
def _get_registered_adapters(): filenames = sequana_data('*', 'data/adapters') filenames = [x for x in filenames if x.startswith("adapters")] registered = [x.lstrip("adapters_").split("_",1)[0] for x in filenames] registered = set(registered) return registered