def main(args=None): if args is None: args = sys.argv[:] user_options = Options(prog="sequana") # If --help or no options provided, show the help if len(args) == 1: user_options.parse_args(["prog", "--help"]) else: options = user_options.parse_args(args[1:]) logger.level = options.level if options.update_taxonomy is True: from sequana.taxonomy import Taxonomy tax = Taxonomy() from sequana import sequana_config_path as cfg logger.info( "Will overwrite the local database taxonomy.dat in {}".format(cfg)) tax.download_taxonomic_file(overwrite=True) sys.exit(0) # We put the import here to make the --help faster from sequana import KrakenPipeline from sequana.kraken import KrakenSequential devtools = DevTools() if options.download: from sequana import KrakenDownload kd = KrakenDownload() kd.download(options.download) sys.exit() fastq = [] if options.file1: devtools.check_exists(options.file1) fastq.append(options.file1) if options.file2: devtools.check_exists(options.file2) fastq.append(options.file2) from sequana import sequana_config_path as scfg if options.databases is None: logger.critical("You must provide a database") sys.exit(1) databases = [] for database in options.databases: if database == "toydb": database = "kraken_toydb" elif database == "minikraken": database = "minikraken_20141208" if os.path.exists(scfg + os.sep + database): # in Sequana path databases.append(scfg + os.sep + database) elif os.path.exists(database): # local database databases.append(database) else: msg = "Invalid database name (%s). Neither found locally " msg += "or in the sequana path %s; Use the --download option" raise ValueError(msg % (database, scfg)) output_directory = options.directory + os.sep + "kraken" devtools.mkdirs(output_directory) # if there is only one database, use the pipeline else KrakenHierarchical _pathto = lambda x: "{}/kraken/{}".format(options.directory, x) if x else x if len(databases) == 1: logger.info("Using 1 database") k = KrakenPipeline(fastq, databases[0], threads=options.thread, output_directory=output_directory, confidence=options.confidence) k.run(output_filename_classified=_pathto(options.classified_out), output_filename_unclassified=_pathto(options.unclassified_out)) else: logger.info("Using %s databases" % len(databases)) k = KrakenSequential(fastq, databases, threads=options.thread, output_directory=output_directory + os.sep, force=True, keep_temp_files=options.keep_temp_files, output_filename_unclassified=_pathto( options.unclassified_out), confidence=options.confidence) k.run(output_prefix="kraken") # This statements sets the directory where HTML will be saved from sequana.utils import config config.output_dir = options.directory # output_directory first argument: the directory where to find the data # output_filename is relative to the config.output_dir defined above kk = KrakenModule(output_directory, output_filename="summary.html") logger.info("Open ./%s/summary.html" % options.directory) logger.info("or ./%s/kraken/kraken.html" % options.directory) if options.html is True: ss.onweb()
def main(args=None): if args is None: args = sys.argv[:] user_options = Options(prog="sequana") # If --help or no options provided, show the help if len(args) == 1: user_options.parse_args(["prog", "--help"]) else: options = user_options.parse_args(args[1:]) logger.level = options.level # We put the import here to make the --help faster from sequana import KrakenPipeline from sequana.kraken import KrakenHierarchical devtools = DevTools() if options.download: from sequana import KrakenDownload kd = KrakenDownload() kd.download(options.download) sys.exit() fastq = [] if options.file1: devtools.check_exists(options.file1) fastq.append(options.file1) if options.file2: devtools.check_exists(options.file2) fastq.append(options.file2) from sequana import sequana_config_path as scfg if options.databases is None: _log.critical("You must provide a database") sys.exit(1) databases = [] for database in options.databases: if database == "toydb": database = "kraken_toydb" elif database == "minikraken": database = "minikraken_20141208" if os.path.exists(scfg + os.sep + database): # in Sequana path databases.append(scfg + os.sep + database) elif os.path.exists(database): # local database databases.append(database) else: msg = "Invalid database name (%s). Neither found locally " msg += "or in the sequana path %s; Use the --download option" raise ValueError(msg % (database, scfg)) output_directory = options.directory + os.sep + "kraken" devtools.mkdirs(output_directory) # if there is only one database, use the pipeline else KrakenHierarchical if len(databases) == 1: _log.info("Using 1 database") k = KrakenPipeline(fastq, databases[0], threads=options.thread, output_directory=output_directory) _pathto = lambda x: "{}/kraken/{}".format(options.directory, x) if x else x k.run(output_filename_classified=_pathto(options.classified_out), output_filename_unclassified=_pathto(options.unclassified_out)) else: _log.info("Using %s databases" % len(databases)) k = KrakenHierarchical(fastq, databases, threads=options.thread, output_directory=output_directory+os.sep, force=True, keep_temp_files=options.keep_temp_files) k.run(output_prefix="kraken") # This statements sets the directory where HTML will be saved from sequana.utils import config config.output_dir = options.directory # output_directory first argument: the directory where to find the data # output_filename is relative to the config.output_dir defined above kk = KrakenModule(output_directory, output_filename="summary.html") _log.info("Open ./%s/summary.html" % options.directory) _log.info("or ./%s/kraken/kraken.html" % options.directory) if options.html is True: ss.onweb()
class KrakenAnalysis(object): """Run kraken on a set of FastQ files In order to run a Kraken analysis, we firtst need a local database. We provide a Toy example. The ToyDB is downloadable as follows ( you will need to run the following code only once):: from sequana import KrakenDownload kd = KrakenDownload() kd.download_kraken_toydb() .. seealso:: :class:`KrakenDownload` for more database and :class:`sequana.kraken_builder.KrakenBuilder` to build your own databases The path to the database is required to run the analysis. It has been stored in the directory ./config/sequana/kraken_toydb under Linux platforms The following code should be platform independent:: import os from sequana import sequana_config_path database = sequana_config_path + os.sep + "kraken_toydb") Finally, we can run the analysis on the toy data set:: from sequana import sequana_data data = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz", "data") ka = KrakenAnalysis(data, database=database) ka.run() This creates a file named *kraken.out*. It can be interpreted with :class:`KrakenResults` """ def __init__(self, fastq, database, threads=4): """.. rubric:: Constructor :param fastq: either a fastq filename or a list of 2 fastq filenames :param database: the path to a valid Kraken database :param threads: number of threads to be used by Kraken :param output: output filename of the Krona HTML page :param return: """ self._devtools = DevTools() self._devtools.check_exists(database) self.database = database self.threads = threads # Fastq input if isinstance(fastq, str): self.paired = False self.fastq = [fastq] elif isinstance(fastq, list): if len(fastq) == 2: self.paired = True else: self.paired = False self.fastq = fastq else: raise ValueError( "Expected a fastq filename or list of 2 fastq filenames") for this in self.fastq: self._devtools.check_exists(database) def run(self, output_filename=None, output_filename_classified=None, output_filename_unclassified=None, only_classified_output=False): """Performs the kraken analysis :param str output_filename: if not provided, a temporary file is used and stored in :attr:`kraken_output`. :param str output_filename_classified: not compressed :param str output_filename_unclassified: not compressed """ if output_filename is None: self.kraken_output = TempFile().name else: self.kraken_output = output_filename params = { "database": self.database, "thread": self.threads, "file1": self.fastq[0], "kraken_output": self.kraken_output, "output_filename_unclassified": output_filename_unclassified, "output_filename_classified": output_filename_classified, } if self.paired: params["file2"] = self.fastq[1] command = "kraken -db %(database)s %(file1)s " if self.paired: command += " %(file2)s --paired" command += " --threads %(thread)s --out %(kraken_output)s" if output_filename_unclassified: command += " --unclassified-out %(output_filename_unclassified)s " if only_classified_output is True: command += " --only-classified-output" if output_filename_classified: command += " --classified-out %(output_filename_classified)s " command = command % params # Somehow there is an error using easydev.execute with pigz from snakemake import shell shell(command)
class KrakenAnalysis(object): """Run kraken on a set of FastQ files In order to run a Kraken analysis, we firtst need a local database. We provide a Toy example. The ToyDB is downloadable as follows ( you will need to run the following code only once):: from sequana import KrakenDownload kd = KrakenDownload() kd.download_kraken_toydb() .. seealso:: :class:`KrakenDownload` for more database and :class:`sequana.kraken_builder.KrakenBuilder` to build your own databases The path to the database is required to run the analysis. It has been stored in the directory ./config/sequana/kraken_toydb under Linux platforms The following code should be platform independent:: import os from sequana import sequana_config_path database = sequana_config_path + os.sep + "kraken_toydb") Finally, we can run the analysis on the toy data set:: from sequana import sequana_data data = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz", "data") ka = KrakenAnalysis(data, database=database) ka.run() This creates a file named *kraken.out*. It can be interpreted with :class:`KrakenResults` """ def __init__(self, fastq, database, threads=4 ): """.. rubric:: Constructor :param fastq: either a fastq filename or a list of 2 fastq filenames :param database: the path to a valid Kraken database :param threads: number of threads to be used by Kraken :param output: output filename of the Krona HTML page :param return: """ self._devtools = DevTools() self._devtools.check_exists(database) self.database = database self.threads = threads # Fastq input if isinstance(fastq, str): self.paired = False self.fastq = [fastq] elif isinstance(fastq, list): if len(fastq) == 2: self.paired = True else: self.paired = False self.fastq = fastq else: raise ValueError("Expected a fastq filename or list of 2 fastq filenames") for this in self.fastq: self._devtools.check_exists(database) def run(self, output_filename=None, output_filename_classified=None, output_filename_unclassified=None, only_classified_output=False): """Performs the kraken analysis :param str output_filename: if not provided, a temporary file is used and stored in :attr:`kraken_output`. :param str output_filename_classified: not compressed :param str output_filename_unclassified: not compressed """ if output_filename is None: self.kraken_output = TempFile().name else: self.kraken_output = output_filename params = { "database": self.database, "thread": self.threads, "file1": self.fastq[0], "kraken_output": self.kraken_output, "output_filename_unclassified": output_filename_unclassified, "output_filename_classified": output_filename_classified, } if self.paired: params["file2"] = self.fastq[1] command = "kraken -db %(database)s %(file1)s " if self.paired: command += " %(file2)s --paired" command += " --threads %(thread)s --output %(kraken_output)s " command += " --out-fmt legacy" if output_filename_unclassified: command += " --unclassified-out %(output_filename_unclassified)s " if only_classified_output is True: command += " --only-classified-output" if output_filename_classified: command += " --classified-out %(output_filename_classified)s " command = command % params # Somehow there is an error using easydev.execute with pigz from snakemake import shell shell(command)