Exemplo n.º 1
0
    def setUp(self):
        blast_databases_repositories = AMRDatabasesManager.create_default_manager(
        ).get_database_repos()
        self.resfinder_dir = blast_databases_repositories.get_repo_dir(
            'resfinder')
        self.pointfinder_dir = blast_databases_repositories.get_repo_dir(
            'pointfinder')
        self.plasmidfinder_dir = blast_databases_repositories.get_repo_dir(
            'plasmidfinder')

        self.resfinder_database = ResfinderBlastDatabase(self.resfinder_dir)
        self.resfinder_drug_table = ARGDrugTableResfinder()
        self.pointfinder_drug_table = ARGDrugTablePointfinder()
        self.plasmidfinder_database = PlasmidfinderBlastDatabase(
            self.plasmidfinder_dir)
        self.pointfinder_database = None
        self.blast_out = tempfile.TemporaryDirectory()
        self.blast_handler = JobHandler(
            {
                'resfinder': self.resfinder_database,
                'pointfinder': self.pointfinder_database,
                'plasmidfinder': self.plasmidfinder_database
            }, 2, self.blast_out.name)

        self.outdir = tempfile.TemporaryDirectory()
        self.amr_detection = AMRDetectionResistance(
            self.resfinder_database,
            self.resfinder_drug_table,
            self.blast_handler,
            self.pointfinder_drug_table,
            self.pointfinder_database,
            output_dir=self.outdir.name)

        self.test_data_dir = path.join(path.dirname(__file__), '..', 'data')
Exemplo n.º 2
0
    def run(self, args):
        super(Build, self).run(args)

        if path.exists(args.destination):
            if args.destination == self._default_dir:
                raise CommandParseException("Error, default destination [" +
                                            args.destination +
                                            "] already exists",
                                            self._root_arg_parser,
                                            print_help=True)
            else:
                raise CommandParseException(
                    "Error, destination [" + args.destination +
                    "] already exists", self._root_arg_parser)
        else:
            mkdir(args.destination)

        if args.destination == AMRDatabasesManager.get_default_database_directory(
        ):
            database_repos = AMRDatabasesManager.create_default_manager(
            ).get_database_repos()
        else:
            database_repos = AMRDatabasesManager(
                args.destination).get_database_repos()
        database_repos.build({
            'resfinder': args.resfinder_commit,
            'pointfinder': args.pointfinder_commit
        })
        if not AMRDatabasesManager.is_database_repos_default_commits(
                database_repos):
            logger.warning(
                "Built non-default ResFinder/PointFinder database version. This may lead to "
                +
                "differences in the detected AMR genes depending on how the database files are structured."
            )
Exemplo n.º 3
0
    def run(self, args):
        super(Update, self).run(args)

        if len(args.directories) == 0:
            if not args.update_default:
                raise CommandParseException("Must pass at least one directory to update, or use '--update-default'",
                                            self._root_arg_parser,
                                            print_help=True)
            else:
                try:
                    database_repos = AMRDatabasesManager.create_default_manager().get_database_repos(
                        force_use_git=True)

                    database_repos.update(
                        {'resfinder': args.resfinder_commit, 'pointfinder': args.pointfinder_commit,
                         'plasmidfinder': args.plasmidfinder_commit})

                    if not AMRDatabasesManager.is_database_repos_default_commits(database_repos):
                        logger.warning(
                            "Updated to non-default ResFinder/PointFinder/PlasmidFinder database version. This may lead to " +
                            "differences in the detected AMR genes depending on how the database files are structured.")
                except DatabaseErrorException as e:
                    logger.error(
                        "Could not update default database. Please try restoring with 'staramr db restore-default'")
                    raise e
        else:
            for directory in args.directories:
                database_repos = AMRDatabasesManager(directory).get_database_repos()
                database_repos.update({'resfinder': args.resfinder_commit, 'pointfinder': args.pointfinder_commit,
                                       'plasmidfinder': args.plasmidfinder_commit})
                if not AMRDatabasesManager.is_database_repos_default_commits(database_repos):
                    logger.warning(
                        "Updated to non-default ResFinder/PointFinder/PlasmidFinder database version [%s]. This may lead to " +
                        "differences in the detected AMR genes depending on how the database files are structured.",
                        directory)
Exemplo n.º 4
0
    def testIsHandlerDefaultCommitsTrue(self):
        # Setup default database
        self.databases_manager.setup_default()

        database_handler = self.databases_manager.get_database_handler()

        self.assertTrue(
            AMRDatabasesManager.is_handler_default_commits(database_handler),
            "Database is not default")
    def testIsHandlerDefaultCommitsTrue(self):
        # Setup default database
        self.databases_manager.setup_default()

        blast_database_repos = self.databases_manager.get_database_repos()

        self.assertTrue(
            AMRDatabasesManager.is_database_repos_default_commits(
                blast_database_repos), "Database is not default")
    def testIsHandlerDefaultCommitsFalse(self):
        # Setup database
        blast_database_repos = self.databases_manager.get_database_repos(
            force_use_git=True)
        blast_database_repos.update(
            {'resfinder': 'dc33e2f9ec2c420f99f77c5c33ae3faa79c999f2'})

        self.assertFalse(
            AMRDatabasesManager.is_database_repos_default_commits(
                blast_database_repos), "Database is default")
Exemplo n.º 7
0
    def testIsHandlerDefaultCommitsFalse(self):
        # Setup database
        database_handler = self.databases_manager.get_database_handler(
            force_use_git=True)
        database_handler.update(
            resfinder_commit='dc33e2f9ec2c420f99f77c5c33ae3faa79c999f2')

        self.assertFalse(
            AMRDatabasesManager.is_handler_default_commits(database_handler),
            "Database is default")
Exemplo n.º 8
0
    def run(self, args):
        super(RestoreDefault, self).run(args)

        database_manager = AMRDatabasesManager.create_default_manager()

        if not args.force:
            response = self._confirm_restore()
        else:
            response = True

        if response:
            database_manager.restore_default()
Exemplo n.º 9
0
    def _setup_args(self, arg_parser):
        name = self._script_name
        default_dir = AMRDatabasesManager.get_default_database_directory()
        epilog = ("Example:\n"
                  "\t" + name + " info\n"
                                "\t\tPrints information about the default database in " + default_dir + "\n\n" +
                  "\t" + name + " info databases\n" +
                  "\t\tPrints information on the database stored in databases/")
        arg_parser = self._subparser.add_parser('info',
                                                epilog=epilog,
                                                formatter_class=argparse.RawTextHelpFormatter,
                                                help='Prints information on databases in the given directories.')
        arg_parser.add_argument('directories', nargs='*')

        return arg_parser
Exemplo n.º 10
0
    def _setup_args(self, arg_parser):
        name = self._script_name
        self._default_dir = AMRDatabasesManager.get_default_database_directory(
        )
        epilog = (
            "Example:\n"
            "\t" + name + " build\n"
            "\t\tBuilds a new ResFinder/PointFinder database under " +
            self._default_dir + " if it does not exist\n\n" + "\t" + name +
            " build --dir databases\n" +
            "\t\tBuilds a new ResFinder/PointFinder database under databases/")

        arg_parser = self._subparser.add_parser(
            'build',
            epilog=epilog,
            formatter_class=argparse.RawTextHelpFormatter,
            help='Downloads and builds databases in the given directory.')
        arg_parser.add_argument(
            '--dir',
            action='store',
            dest='destination',
            type=str,
            help='The directory to download the databases into [' +
            self._default_dir + '].',
            default=self._default_dir,
            required=False)
        arg_parser.add_argument(
            '--resfinder-commit',
            action='store',
            dest='resfinder_commit',
            type=str,
            help='The specific git commit for the resfinder database [latest].',
            required=False)
        arg_parser.add_argument(
            '--pointfinder-commit',
            action='store',
            dest='pointfinder_commit',
            type=str,
            help=
            'The specific git commit for the pointfinder database [latest].',
            required=False)
        return arg_parser
Exemplo n.º 11
0
    def _setup_args(self, arg_parser):
        self._default_dir = AMRDatabasesManager.get_default_database_directory(
        )
        name = self._script_name
        epilog = (
            "Example:\n"
            "\t" + name + " update databases/\n"
            "\t\tUpdates the ResFinder/PointFinder database under databases/\n\n"
            + "\t" + name + " update -d\n" +
            "\t\tUpdates the default ResFinder/PointFinder database under " +
            self._default_dir)
        arg_parser = self._subparser.add_parser(
            'update',
            epilog=epilog,
            formatter_class=argparse.RawTextHelpFormatter,
            help='Updates databases in the given directories.')

        arg_parser.add_argument('-d',
                                '--update-default',
                                action='store_true',
                                dest='update_default',
                                help='Updates default database directory (' +
                                self._default_dir + ').',
                                required=False)
        arg_parser.add_argument(
            '--resfinder-commit',
            action='store',
            dest='resfinder_commit',
            type=str,
            help='The specific git commit for the resfinder database [latest].',
            required=False)
        arg_parser.add_argument(
            '--pointfinder-commit',
            action='store',
            dest='pointfinder_commit',
            type=str,
            help=
            'The specific git commit for the pointfinder database [latest].',
            required=False)
        arg_parser.add_argument('directories', nargs='*')

        return arg_parser
Exemplo n.º 12
0
    def run(self, args):
        super(Info, self).run(args)

        arg_drug_table = ARGDrugTable()

        if len(args.directories) == 0:
            database_repos = AMRDatabasesManager.create_default_manager(
            ).get_database_repos()
            if not AMRDatabasesManager.is_database_repos_default_commits(
                    database_repos):
                logger.warning(
                    "Using non-default ResFinder/PointFinder database versions. This may lead to differences in the detected "
                    +
                    "AMR genes depending on how the database files are structured."
                )

            try:
                database_info = database_repos.info()
                database_info.update(
                    arg_drug_table.get_resistance_table_info())
                sys.stdout.write(get_string_with_spacing(database_info))
            except DatabaseNotFoundException as e:
                logger.error(
                    "No database found. Perhaps try restoring the default with 'staramr db restore-default'"
                )
        else:
            for directory in args.directories:
                try:
                    database_repos = AMRDatabasesManager(
                        directory).get_database_repos()
                    if not AMRDatabasesManager.is_database_repos_default_commits(
                            database_repos):
                        logger.warning(
                            "Using non-default ResFinder/PointFinder database version [%s]. This may lead to "
                            +
                            "differences in the detected AMR genes depending on how the database files are structured.",
                            directory)

                    database_info = database_repos.info()
                    database_info.update(
                        arg_drug_table.get_resistance_table_info())
                    sys.stdout.write(get_string_with_spacing(database_info))
                except DatabaseNotFoundException as e:
                    logger.error(
                        "Database not found in [%s]. Perhaps try building with 'staramr db build --dir %s'",
                        directory, directory)
Exemplo n.º 13
0
class AMRDatabasesManagerIT(unittest.TestCase):
    RESFINDER_DEFAULT_COMMIT = 'e8f1eb2585cd9610c4034a54ce7fc4f93aa95535'
    POINTFINDER_DEFAULT_COMMIT = '8706a6363bb29e47e0e398c53043b037c24b99a7'

    def setUp(self):
        self.databases_dir = tempfile.TemporaryDirectory()
        self.databases_manager = AMRDatabasesManager(
            database_dir=self.databases_dir.name, sub_dirs=True)

    def tearDown(self):
        self.databases_dir.cleanup()

    def testGetHandlerGitStripDir(self):
        self.assertIsInstance(self.databases_manager.get_database_handler(),
                              AMRDatabaseHandlerStripGitDir,
                              'Invalid instance returned')

    def testGetHandlerGit(self):
        self.assertIsInstance(
            self.databases_manager.get_database_handler(force_use_git=True),
            AMRDatabaseHandler, 'Invalid instance returned')

    def testSetupDefault(self):
        database_handler = self.databases_manager.get_database_handler()

        # Verify that databases don't exist beforehand
        self.assertFalse(path.exists(database_handler.get_resfinder_dir()),
                         'resfinder path exists before creation of database')
        self.assertFalse(
            path.exists(database_handler.get_pointfinder_dir()),
            'pointfinder path exists before creation of database')

        # Setup default database
        self.databases_manager.setup_default()

        # Verify that resfinder/pointfinder paths exist
        self.assertTrue(path.exists(database_handler.get_resfinder_dir()),
                        'resfinder path does not exist')
        self.assertTrue(path.exists(database_handler.get_resfinder_dir()),
                        'pointfinder path does not exist')
        self.assertTrue(
            path.exists(
                path.join(database_handler.get_database_dir(), 'info.ini')),
            'info file does not exist')

        # Verify we've removed the .git directories
        self.assertFalse(
            path.exists(path.join(database_handler.get_resfinder_dir(),
                                  '.git')),
            'resfinder .git directory was not removed')
        self.assertFalse(
            path.exists(
                path.join(database_handler.get_pointfinder_dir(), '.git')),
            'pointfinder .git directory was not removed')

        config = configparser.ConfigParser()
        config.read(path.join(database_handler.get_database_dir(), 'info.ini'))

        # Verify that the info.ini file has correct git commits for default database
        self.assertEqual(config['GitInfo']['resfinder_db_commit'],
                         self.RESFINDER_DEFAULT_COMMIT,
                         'invalid resfinder commit')
        self.assertEqual(config['GitInfo']['pointfinder_db_commit'],
                         self.POINTFINDER_DEFAULT_COMMIT,
                         'invalid pointfinder commit')

    def testRestoreDefault(self):
        # Build initial default database
        self.databases_manager.setup_default()

        # Build updated database
        database_handler_git = self.databases_manager.get_database_handler(
            force_use_git=True)
        database_handler_git.build(
            resfinder_commit=self.RESFINDER_DEFAULT_COMMIT,
            pointfinder_commit=self.POINTFINDER_DEFAULT_COMMIT)

        # Verify that updated database is the one that gets returned by get_database_handler()
        database_handler = self.databases_manager.get_database_handler()
        self.assertIsInstance(database_handler, AMRDatabaseHandler,
                              'Invalid instance returned')
        self.assertTrue(
            path.exists(path.join(database_handler.get_resfinder_dir(),
                                  '.git')),
            'Not using git version (updated version) of resfinder database')
        self.assertTrue(
            path.exists(
                path.join(database_handler.get_pointfinder_dir(), '.git')),
            'Not using git version (updated version) of pointfinder database')

        # Restore default database
        self.databases_manager.restore_default()

        # Verify that default database (git stripped version) is the one that gets returned by get_database_handler()
        database_handler = self.databases_manager.get_database_handler()
        self.assertIsInstance(database_handler, AMRDatabaseHandlerStripGitDir,
                              'Invalid instance returned')
        self.assertFalse(
            path.exists(path.join(database_handler.get_resfinder_dir(),
                                  '.git')),
            'resfinder .git directory was not removed')
        self.assertFalse(
            path.exists(
                path.join(database_handler.get_pointfinder_dir(), '.git')),
            'pointfinder .git directory was not removed')

    def testIsHandlerDefaultCommitsTrue(self):
        # Setup default database
        self.databases_manager.setup_default()

        database_handler = self.databases_manager.get_database_handler()

        self.assertTrue(
            AMRDatabasesManager.is_handler_default_commits(database_handler),
            "Database is not default")

    def testIsHandlerDefaultCommitsFalse(self):
        # Setup database
        database_handler = self.databases_manager.get_database_handler(
            force_use_git=True)
        database_handler.update(
            resfinder_commit='dc33e2f9ec2c420f99f77c5c33ae3faa79c999f2')

        self.assertFalse(
            AMRDatabasesManager.is_handler_default_commits(database_handler),
            "Database is default")
class AMRDatabasesManagerIT(unittest.TestCase):
    RESFINDER_DEFAULT_COMMIT = 'e8f1eb2585cd9610c4034a54ce7fc4f93aa95535'
    POINTFINDER_DEFAULT_COMMIT = '8706a6363bb29e47e0e398c53043b037c24b99a7'
    PLASMIDFINDER_DEFAULT_COMMIT = '81919954cbedaff39056610ab584ab4c06011ed8'

    def setUp(self):
        self.databases_dir = tempfile.TemporaryDirectory()
        self.databases_manager = AMRDatabasesManager(
            database_dir=self.databases_dir.name, sub_dirs=True)

    def tearDown(self):
        self.databases_dir.cleanup()

    def testSetupDefault(self):
        blast_database_repos = self.databases_manager.get_database_repos()

        # Verify that databases don't exist beforehand
        self.assertFalse(
            path.exists(blast_database_repos.get_repo_dir('resfinder')),
            'resfinder path exists before creation of database')
        self.assertFalse(
            path.exists(blast_database_repos.get_repo_dir('pointfinder')),
            'pointfinder path exists before creation of database')
        self.assertFalse(
            path.exists(blast_database_repos.get_repo_dir('plasmidfinder')),
            'plasmidfinder path exists before creation of database')

        # Setup default database
        self.databases_manager.setup_default()

        # Verify that resfinder/pointfinder paths exist
        self.assertTrue(
            path.exists(blast_database_repos.get_repo_dir('resfinder')),
            'resfinder path does not exist')
        self.assertTrue(
            path.exists(blast_database_repos.get_repo_dir('pointfinder')),
            'pointfinder path does not exist')
        self.assertTrue(
            path.exists(blast_database_repos.get_repo_dir('plasmidfinder')),
            'plasmidfinder path does not exist')

        self.assertTrue(
            path.exists(
                path.join(blast_database_repos.get_database_dir(),
                          'resfinder-info.ini')),
            'resfinder info file does not exist')
        self.assertTrue(
            path.exists(
                path.join(blast_database_repos.get_database_dir(),
                          'pointfinder-info.ini')),
            'pointfinder info file does not exist')
        self.assertTrue(
            path.exists(
                path.join(blast_database_repos.get_database_dir(),
                          'plasmidfinder-info.ini')),
            'plasmidfinder info file does not exist')

        # Verify we've removed the .git directories
        self.assertFalse(
            path.exists(
                path.join(blast_database_repos.get_repo_dir('resfinder'),
                          '.git')), 'resfinder .git directory was not removed')
        self.assertFalse(
            path.exists(
                path.join(blast_database_repos.get_repo_dir('pointfinder'),
                          '.git')),
            'pointfinder .git directory was not removed')
        self.assertFalse(
            path.exists(
                path.join(blast_database_repos.get_repo_dir('plasmidfinder'),
                          '.git')),
            'plasmidfinder .git directory was not removed')

        config = configparser.ConfigParser()

        # Verify that the info.ini file has correct git commits for default database
        config.read(
            path.join(blast_database_repos.get_database_dir(),
                      'resfinder-info.ini'))
        self.assertEqual(config['GitInfo']['resfinder_db_commit'],
                         self.RESFINDER_DEFAULT_COMMIT,
                         'invalid resfinder commit')

        config.read(
            path.join(blast_database_repos.get_database_dir(),
                      'pointfinder-info.ini'))
        self.assertEqual(config['GitInfo']['pointfinder_db_commit'],
                         self.POINTFINDER_DEFAULT_COMMIT,
                         'invalid pointfinder commit')

        config.read(
            path.join(blast_database_repos.get_database_dir(),
                      'plasmidfinder-info.ini'))
        self.assertEqual(config['GitInfo']['plasmidfinder_db_commit'],
                         self.PLASMIDFINDER_DEFAULT_COMMIT,
                         'invalid plasmidfinder commit')

    def testRestoreDefault(self):
        # Build initial default database
        self.databases_manager.setup_default()

        # Build updated database
        blast_database_repos_git = self.databases_manager.get_database_repos(
            force_use_git=True)
        blast_database_repos_git.build({
            'resfinder':
            self.RESFINDER_DEFAULT_COMMIT,
            'pointfinder':
            self.POINTFINDER_DEFAULT_COMMIT,
            'plasmidfinder':
            self.PLASMIDFINDER_DEFAULT_COMMIT
        })

        # Verify that updated database is the one that gets returned by get_database_handler()
        blast_database_repos = self.databases_manager.get_database_repos()
        self.assertFalse(blast_database_repos.is_dist(), 'Invalid is_dist')
        self.assertEqual(blast_database_repos.get_database_dir(),
                         path.join(self.databases_dir.name, 'update'),
                         'Invalid database directory')
        self.assertTrue(
            path.exists(
                path.join(blast_database_repos.get_repo_dir('resfinder'),
                          '.git')),
            'Not using git version (updated version) of resfinder database')
        self.assertTrue(
            path.exists(
                path.join(blast_database_repos.get_repo_dir('pointfinder'),
                          '.git')),
            'Not using git version (updated version) of pointfinder database')
        self.assertTrue(
            path.exists(
                path.join(blast_database_repos.get_repo_dir('plasmidfinder'),
                          '.git')),
            'Not using git version (updated version) of plasmidfinder database'
        )

        # Restore default database
        self.databases_manager.restore_default()

        # Verify that default database (git stripped version) is the one that gets returned by get_database_handler()
        blast_database_repos = self.databases_manager.get_database_repos()
        self.assertTrue(blast_database_repos.is_dist(), 'Invalid is_dist')
        self.assertEqual(blast_database_repos.get_database_dir(),
                         path.join(self.databases_dir.name, 'dist'),
                         'Invalid database directory')
        self.assertFalse(
            path.exists(
                path.join(blast_database_repos.get_repo_dir('resfinder'),
                          '.git')), 'resfinder .git directory was not removed')
        self.assertFalse(
            path.exists(
                path.join(blast_database_repos.get_repo_dir('pointfinder'),
                          '.git')),
            'pointfinder .git directory was not removed')
        self.assertFalse(
            path.exists(
                path.join(blast_database_repos.get_repo_dir('plasmidfinder'),
                          '.git')),
            'plasmidfinder .git directory was not removed')

    def testIsHandlerDefaultCommitsTrue(self):
        # Setup default database
        self.databases_manager.setup_default()

        blast_database_repos = self.databases_manager.get_database_repos()

        self.assertTrue(
            AMRDatabasesManager.is_database_repos_default_commits(
                blast_database_repos), "Database is not default")

    def testIsHandlerDefaultCommitsFalse(self):
        # Setup database
        blast_database_repos = self.databases_manager.get_database_repos(
            force_use_git=True)
        blast_database_repos.update(
            {'resfinder': 'dc33e2f9ec2c420f99f77c5c33ae3faa79c999f2'})

        self.assertFalse(
            AMRDatabasesManager.is_database_repos_default_commits(
                blast_database_repos), "Database is default")
Exemplo n.º 15
0
    def _setup_args(self, arg_parser):
        name = self._script_name
        epilog = (
            "Example:\n"
            "\t" + name + " search -o out *.fasta\n"
            "\t\tSearches the files *.fasta for AMR genes using only the ResFinder database, storing results in the out/ directory.\n\n"
            + "\t" + name +
            " search --pointfinder-organism salmonella --output-excel results.xlsx *.fasta\n"
            +
            "\t\tSearches *.fasta for AMR genes using ResFinder and PointFinder database with the passed organism, storing results in results.xlsx."
        )

        arg_parser = self._subparser.add_parser(
            'search',
            epilog=epilog,
            formatter_class=argparse.RawTextHelpFormatter,
            help='Search for AMR genes')

        self._default_database_dir = AMRDatabasesManager.get_default_database_directory(
        )
        cpu_count = multiprocessing.cpu_count()

        arg_parser.add_argument(
            '--pointfinder-organism',
            action='store',
            dest='pointfinder_organism',
            type=str,
            help='The organism to use for pointfinder {' +
            ', '.join(PointfinderBlastDatabase.get_available_organisms()) +
            '}. Defaults to disabling search for point mutations. [None].',
            default=None,
            required=False)
        arg_parser.add_argument(
            '-d',
            '--database',
            action='store',
            dest='database',
            type=str,
            help=
            'The directory containing the resfinder/pointfinder databases [' +
            self._default_database_dir + '].',
            default=self._default_database_dir,
            required=False)
        arg_parser.add_argument(
            '-n',
            '--nprocs',
            action='store',
            dest='nprocs',
            type=int,
            help='The number of processing cores to use [' + str(cpu_count) +
            '].',
            default=cpu_count,
            required=False)

        threshold_group = arg_parser.add_argument_group('BLAST Thresholds')
        threshold_group.add_argument(
            '--pid-threshold',
            action='store',
            dest='pid_threshold',
            type=float,
            help='The percent identity threshold [98.0].',
            default=98.0,
            required=False)
        threshold_group.add_argument(
            '--percent-length-overlap-resfinder',
            action='store',
            dest='plength_threshold_resfinder',
            type=float,
            help='The percent length overlap for resfinder results [60.0].',
            default=60.0,
            required=False)
        threshold_group.add_argument(
            '--percent-length-overlap-pointfinder',
            action='store',
            dest='plength_threshold_pointfinder',
            type=float,
            help='The percent length overlap for pointfinder results [95.0].',
            default=95.0,
            required=False)

        report_group = arg_parser.add_argument_group('Reporting options')
        report_group.add_argument(
            '--no-exclude-genes',
            action='store_true',
            dest='no_exclude_genes',
            help=
            'Disable the default exclusion of some genes from ResFinder/PointFinder [False].',
            required=False)
        report_group.add_argument(
            '--exclude-genes-file',
            action='store',
            dest='exclude_genes_file',
            help=
            'A containing a list of ResFinder/PointFinder gene names to exclude from results [{}].'
            .format(ExcludeGenesList.get_default_exclude_file()),
            default=ExcludeGenesList.get_default_exclude_file(),
            required=False)
        report_group.add_argument(
            '--exclude-negatives',
            action='store_true',
            dest='exclude_negatives',
            help=
            'Exclude negative results (those sensitive to antimicrobials) [False].',
            required=False)
        report_group.add_argument(
            '--exclude-resistance-phenotypes',
            action='store_true',
            dest='exclude_resistance_phenotypes',
            help='Exclude predicted antimicrobial resistances [False].',
            required=False)
        report_group.add_argument(
            '--report-all-blast',
            action='store_true',
            dest='report_all_blast',
            help='Report all blast hits (vs. only top blast hits) [False].',
            required=False)

        output_group = arg_parser.add_argument_group(
            title='Output',
            description=
            'Use either --output-dir or specify individual output files')
        output_group.add_argument(
            '-o',
            '--output-dir',
            action='store',
            dest='output_dir',
            type=str,
            help="The output directory for results [None].",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-summary',
            action='store',
            dest='output_summary',
            type=str,
            help=
            "The name of the output file containing the summary results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-resfinder',
            action='store',
            dest='output_resfinder',
            type=str,
            help=
            "The name of the output file containing the resfinder results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-pointfinder',
            action='store',
            dest='output_pointfinder',
            type=str,
            help=
            "The name of the output file containing the pointfinder results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-settings',
            action='store',
            dest='output_settings',
            type=str,
            help=
            "The name of the output file containing the settings. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-excel',
            action='store',
            dest='output_excel',
            type=str,
            help=
            "The name of the output file containing the excel results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-hits-dir',
            action='store',
            dest='hits_output_dir',
            type=str,
            help=
            "The name of the directory to contain the BLAST hit files. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)

        arg_parser.add_argument('files', nargs='+')

        return arg_parser
Exemplo n.º 16
0
    def _setup_args(self, arg_parser):
        name = self._script_name
        epilog = (
            "Example:\n"
            "\t" + name + " search -o out *.fasta\n"
            "\t\tSearches the files *.fasta for AMR genes using only the ResFinder database, storing results in the out/ directory.\n\n"
            + "\t" + name +
            " search --pointfinder-organism salmonella --output-excel results.xlsx *.fasta\n"
            +
            "\t\tSearches *.fasta for AMR genes using ResFinder and PointFinder database with the passed organism, storing results in results.xlsx."
        )

        arg_parser = self._subparser.add_parser(
            'search',
            epilog=epilog,
            formatter_class=argparse.RawTextHelpFormatter,
            help='Search for AMR genes')

        self._default_database_dir = AMRDatabasesManager.get_default_database_directory(
        )
        cpu_count = multiprocessing.cpu_count()

        arg_parser.add_argument(
            '--pointfinder-organism',
            action='store',
            dest='pointfinder_organism',
            type=str,
            help='The organism to use for pointfinder {' +
            ', '.join(PointfinderBlastDatabase.get_available_organisms()) +
            '}. Defaults to disabling search for point mutations. [None].',
            default=None,
            required=False)
        arg_parser.add_argument(
            '--plasmidfinder-database-type',
            action='store',
            dest='plasmidfinder_database_type',
            type=str,
            help='The database type to use for plasmidfinder {' +
            ', '.join(PlasmidfinderBlastDatabase.get_available_databases()) +
            '}. Defaults to using all available database types to search for plasmids. [None].',
            default=None,
            required=False)
        arg_parser.add_argument(
            '-d',
            '--database',
            action='store',
            dest='database',
            type=str,
            help=
            'The directory containing the resfinder/pointfinder/plasmidfinder databases ['
            + self._default_database_dir + '].',
            default=self._default_database_dir,
            required=False)
        arg_parser.add_argument(
            '-n',
            '--nprocs',
            action='store',
            dest='nprocs',
            type=int,
            help='The number of processing cores to use [' + str(cpu_count) +
            '].',
            default=cpu_count,
            required=False)
        arg_parser.add_argument('--ignore-invalid-files',
                                action='store_true',
                                dest='ignore_valid_files',
                                help='Skips over invalid input files',
                                required=False)
        arg_parser.add_argument(
            '--mlst-scheme',
            action='store',
            dest='mlst_scheme',
            help=
            'Specify scheme name, visit https://github.com/tseemann/mlst/blob/master/db/scheme_species_map.tab for supported scheme genus available. [None] ',
            required=False)

        arg_parser.add_argument(
            '--genome-size-lower-bound',
            action='store',
            dest='genome_size_lower_bound',
            type=int,
            help=
            'The lower bound for our genome size for the quality metrics. Defaults to 4 Mbp. [4000000].',
            default=4000000,
            required=False)
        arg_parser.add_argument(
            '--genome-size-upper-bound',
            action='store',
            dest='genome_size_upper_bound',
            type=int,
            help=
            'The upper bound for our genome size for the quality metrics. Defaults to 6 Mbp. [6000000].',
            default=6000000,
            required=False)
        arg_parser.add_argument(
            '--minimum-N50-value',
            action='store',
            dest='minimum_N50_value',
            type=int,
            help=
            'The minimum N50 value for the quality metrics. Defaults to 10000. [10000].',
            default=10000,
            required=False)
        arg_parser.add_argument(
            '--minimum-contig-length',
            action='store',
            dest='minimum_contig_length',
            type=int,
            help=
            'The minimum contig length for the quality metrics. Defaults to 300 bp. [300].',
            default=300,
            required=False)
        arg_parser.add_argument(
            '--unacceptable-number-contigs',
            action='store',
            dest='unacceptable_num_contigs',
            type=int,
            help=
            'The minimum, unacceptable number of contigs which are equal to or above the minimum contig length for our quality metrics. Defaults to 1000. [1000].',
            default=1000,
            required=False)

        threshold_group = arg_parser.add_argument_group('BLAST Thresholds')
        threshold_group.add_argument(
            '--pid-threshold',
            action='store',
            dest='pid_threshold',
            type=float,
            help='The percent identity threshold [98.0].',
            default=98.0,
            required=False)
        threshold_group.add_argument(
            '--percent-length-overlap-resfinder',
            action='store',
            dest='plength_threshold_resfinder',
            type=float,
            help='The percent length overlap for resfinder results [60.0].',
            default=60.0,
            required=False)
        threshold_group.add_argument(
            '--percent-length-overlap-pointfinder',
            action='store',
            dest='plength_threshold_pointfinder',
            type=float,
            help='The percent length overlap for pointfinder results [95.0].',
            default=95.0,
            required=False)
        threshold_group.add_argument(
            '--percent-length-overlap-plasmidfinder',
            action='store',
            dest='plength_threshold_plasmidfinder',
            type=float,
            help='The percent length overlap for resfinder results [60.0].',
            default=60.0,
            required=False)

        report_group = arg_parser.add_argument_group('Reporting options')
        report_group.add_argument(
            '--no-exclude-genes',
            action='store_true',
            dest='no_exclude_genes',
            help=
            'Disable the default exclusion of some genes from ResFinder/PointFinder/PlasmidFinder [False].',
            required=False)
        report_group.add_argument(
            '--exclude-genes-file',
            action='store',
            dest='exclude_genes_file',
            help=
            'A containing a list of ResFinder/PointFinder/PlasmidFinder gene names to exclude from results [{}].'
            .format(ExcludeGenesList.get_default_exclude_file()),
            default=ExcludeGenesList.get_default_exclude_file(),
            required=False)
        report_group.add_argument(
            '--exclude-negatives',
            action='store_true',
            dest='exclude_negatives',
            help=
            'Exclude negative results (those sensitive to antimicrobials) [False].',
            required=False)
        report_group.add_argument(
            '--exclude-resistance-phenotypes',
            action='store_true',
            dest='exclude_resistance_phenotypes',
            help='Exclude predicted antimicrobial resistances [False].',
            required=False)
        report_group.add_argument(
            '--report-all-blast',
            action='store_true',
            dest='report_all_blast',
            help='Report all blast hits (vs. only top blast hits) [False].',
            required=False)

        output_group = arg_parser.add_argument_group(
            title='Output',
            description=
            'Use either --output-dir or specify individual output files')
        output_group.add_argument(
            '-o',
            '--output-dir',
            action='store',
            dest='output_dir',
            type=str,
            help="The output directory for results [None].",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-summary',
            action='store',
            dest='output_summary',
            type=str,
            help=
            "The name of the output file containing the summary results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-detailed-summary',
            action='store',
            dest='output_detailed_summary',
            type=str,
            help=
            "The name of the output file containing the detailed summary results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-resfinder',
            action='store',
            dest='output_resfinder',
            type=str,
            help=
            "The name of the output file containing the resfinder results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-pointfinder',
            action='store',
            dest='output_pointfinder',
            type=str,
            help=
            "The name of the output file containing the pointfinder results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-plasmidfinder',
            action='store',
            dest='output_plasmidfinder',
            type=str,
            help=
            "The name of the output file containing the plasmidfinder results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-settings',
            action='store',
            dest='output_settings',
            type=str,
            help=
            "The name of the output file containing the settings. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-excel',
            action='store',
            dest='output_excel',
            type=str,
            help=
            "The name of the output file containing the excel results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-hits-dir',
            action='store',
            dest='hits_output_dir',
            type=str,
            help=
            "The name of the directory to contain the BLAST hit files. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)
        output_group.add_argument(
            '--output-mlst',
            action='store',
            dest='output_mlst',
            type=str,
            help=
            "The name of the output file containing the mlst results. Not be be used with '--output-dir'. [None]",
            default=None,
            required=False)

        arg_parser.add_argument('files', nargs='+')

        return arg_parser
 def setUp(self):
     self.databases_dir = tempfile.TemporaryDirectory()
     self.databases_manager = AMRDatabasesManager(
         database_dir=self.databases_dir.name, sub_dirs=True)
Exemplo n.º 18
0
    def run(self, args):
        super(Search, self).run(args)

        if (len(args.files) == 0):
            raise CommandParseException("Must pass a fasta file to process",
                                        self._root_arg_parser,
                                        print_help=True)

        for file in args.files:
            if not path.exists(file):
                raise CommandParseException(
                    'File [' + file + '] does not exist',
                    self._root_arg_parser)

        if not path.isdir(args.database):
            if args.database == self._default_database_dir:
                raise CommandParseException(
                    "Default database does not exist. Perhaps try restoring with 'staramr db restore-default'",
                    self._root_arg_parser)
            else:
                raise CommandParseException(
                    "Database directory [" + args.database +
                    "] does not exist. Perhaps try building with" +
                    "'staramr db build --dir " + args.database + "'",
                    self._root_arg_parser)

        if args.database == AMRDatabasesManager.get_default_database_directory(
        ):
            database_repos = AMRDatabasesManager.create_default_manager(
            ).get_database_repos()
        else:
            database_repos = AMRDatabasesManager(
                args.database).get_database_repos()

        if not AMRDatabasesManager.is_database_repos_default_commits(
                database_repos):
            logger.warning(
                "Using non-default ResFinder/PointFinder. This may lead to differences in the detected "
                +
                "AMR genes depending on how the database files are structured."
            )

        resfinder_database = database_repos.build_blast_database('resfinder')
        if (args.pointfinder_organism):
            if args.pointfinder_organism not in PointfinderBlastDatabase.get_available_organisms(
            ):
                raise CommandParseException(
                    "The only Pointfinder organism(s) currently supported are "
                    + str(PointfinderBlastDatabase.get_available_organisms()),
                    self._root_arg_parser)
            pointfinder_database = database_repos.build_blast_database(
                'pointfinder', {'organism': args.pointfinder_organism})
        else:
            logger.info(
                "No --pointfinder-organism specified. Will not search the PointFinder databases"
            )
            pointfinder_database = None

        if (args.plasmidfinder_database_type):
            if args.plasmidfinder_database_type not in PlasmidfinderBlastDatabase.get_available_databases(
            ):
                raise CommandParseException(
                    "The only Plasmidfinder databases that are currently supported are "
                    +
                    str(PlasmidfinderBlastDatabase.get_available_databases()),
                    self._root_arg_parser)
            plasmidfinder_database = database_repos.build_blast_database(
                'plasmidfinder',
                {'database_type': args.plasmidfinder_database_type})
        else:
            logger.info(
                "No --plasmidfinder-database-type specified. Will search the entire PlasmidFinder database"
            )
            plasmidfinder_database = database_repos.build_blast_database(
                'plasmidfinder')

        hits_output_dir = None
        output_summary = None
        output_detailed_summary = None
        output_resfinder = None
        output_pointfinder = None
        output_plasmidfinder = None
        output_mlst = None
        output_excel = None
        output_settings = None
        if args.output_dir:
            if path.exists(args.output_dir):
                raise CommandParseException(
                    "Output directory [" + args.output_dir +
                    "] already exists", self._root_arg_parser)
            elif args.output_summary or args.output_detailed_summary or args.output_resfinder or args.output_pointfinder or args.output_plasmidfinder or args.output_excel or \
                    args.hits_output_dir:
                raise CommandParseException(
                    'You cannot use --output-[type] with --output-dir',
                    self._root_arg_parser)
            else:
                mkdir(args.output_dir)

                hits_output_dir = path.join(args.output_dir, 'hits')
                output_resfinder = path.join(args.output_dir, "resfinder.tsv")
                output_pointfinder = path.join(args.output_dir,
                                               "pointfinder.tsv")
                output_plasmidfinder = path.join(args.output_dir,
                                                 "plasmidfinder.tsv")
                output_summary = path.join(args.output_dir, "summary.tsv")
                output_detailed_summary = path.join(args.output_dir,
                                                    "detailed_summary.tsv")
                output_mlst = path.join(args.output_dir, "mlst.tsv")
                output_settings = path.join(args.output_dir, "settings.txt")
                output_excel = path.join(args.output_dir, 'results.xlsx')

                mkdir(hits_output_dir)

                logger.info(
                    "--output-dir set. All files will be output to [%s]",
                    args.output_dir)
        elif args.output_summary or args.output_excel or args.output_detailed_summary:
            logger.info(
                '--output-dir not set. Files will be output to the respective --output-[type] setting'
            )
            output_resfinder = args.output_resfinder
            output_pointfinder = args.output_pointfinder
            output_plasmidfinder = args.output_plasmidfinder
            output_summary = args.output_summary
            output_detailed_summary = args.output_detailed_summary
            output_mlst = args.output_mlst
            output_settings = args.output_settings
            output_excel = args.output_excel
            hits_output_dir = args.hits_output_dir

            if hits_output_dir is not None:
                if path.exists(
                        hits_output_dir) and not path.isdir(hits_output_dir):
                    raise CommandParseException(
                        "--output-hits-dir [" + hits_output_dir +
                        "] exists and is not a directory",
                        self._root_arg_parser)
                elif path.exists(hits_output_dir):
                    logger.debug(
                        "Found --output-hits-dir [%s] and is a directory. Will write hits here",
                        hits_output_dir)
                else:
                    logger.debug("Making directory [%s]", hits_output_dir)
                    mkdir(hits_output_dir)
        else:
            raise CommandParseException(
                'You must set one of --output-dir, --output-summary, --output-detailed-summary, or --output-excel',
                self._root_arg_parser)

        if args.no_exclude_genes:
            logger.info(
                "--no-exclude-genes enabled. Will not exclude any ResFinder/PointFinder genes."
            )
            exclude_genes = []
        else:
            if not path.exists(args.exclude_genes_file):
                raise CommandParseException(
                    '--exclude-genes-file [{}] does not exist'.format(
                        args.exclude_genes_file), self._root_arg_parser)
            else:
                logger.info(
                    "Will exclude ResFinder/PointFinder genes listed in [%s]. Use --no-exclude-genes to disable",
                    args.exclude_genes_file)
                exclude_genes = ExcludeGenesList(
                    args.exclude_genes_file).tolist()

        results = self._generate_results(
            database_repos=database_repos,
            resfinder_database=resfinder_database,
            pointfinder_database=pointfinder_database,
            plasmidfinder_database=plasmidfinder_database,
            nprocs=args.nprocs,
            include_negatives=not args.exclude_negatives,
            include_resistances=not args.exclude_resistance_phenotypes,
            hits_output=hits_output_dir,
            pid_threshold=args.pid_threshold,
            plength_threshold_resfinder=args.plength_threshold_resfinder,
            plength_threshold_pointfinder=args.plength_threshold_pointfinder,
            plength_threshold_plasmidfinder=args.
            plength_threshold_plasmidfinder,
            report_all_blast=args.report_all_blast,
            genes_to_exclude=exclude_genes,
            files=args.files,
            ignore_invalid_files=args.ignore_valid_files,
            mlst_scheme=args.mlst_scheme,
            genome_size_lower_bound=args.genome_size_lower_bound,
            genome_size_upper_bound=args.genome_size_upper_bound,
            minimum_N50_value=args.minimum_N50_value,
            minimum_contig_length=args.minimum_contig_length,
            unacceptable_num_contigs=args.unacceptable_num_contigs)
        amr_detection = results['results']
        settings = results['settings']

        if output_resfinder:
            logger.info("Writing resfinder to [%s]", output_resfinder)
            with open(output_resfinder, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_resfinder_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-resfinder unset. No resfinder file will be written"
            )

        if args.pointfinder_organism and output_pointfinder:
            logger.info("Writing pointfinder to [%s]", output_pointfinder)
            with open(output_pointfinder, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_pointfinder_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-pointfinder unset. No pointfinder file will be written"
            )

        if output_plasmidfinder:
            logger.info("Writing plasmidfinder to [%s]", output_plasmidfinder)
            with open(output_plasmidfinder, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_plasmidfinder_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-plasmidfinder unset. No plasmidfinder file will be written"
            )

        if output_summary:
            logger.info("Writing summary to [%s]", output_summary)
            with open(output_summary, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_summary_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-summary unset. No summary file will be written"
            )

        if output_mlst:
            logger.info("Writing MLST summary to [%s]", output_mlst)
            with open(output_mlst, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_mlst_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-mlst unset. No mlst file will be written"
            )

        if output_detailed_summary:
            logger.info("Writing detailed summary to [%s]",
                        output_detailed_summary)
            with open(output_detailed_summary, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_detailed_summary_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-detailed-summary unset. No detailed summary file will be written"
            )

        if output_settings:
            logger.info("Writing settings to [%s]", output_settings)
            self._print_settings_to_file(settings, output_settings)
        else:
            logger.info(
                "--output-dir or --output-settings unset. No settings file will be written"
            )

        if output_excel:
            logger.info("Writing Excel to [%s]", output_excel)
            settings_dataframe = pd.DataFrame.from_dict(settings,
                                                        orient='index')
            settings_dataframe.index.name = 'Key'
            settings_dataframe.set_axis(['Value'],
                                        axis='columns',
                                        inplace=True)

            self._print_dataframes_to_excel(
                output_excel, amr_detection.get_summary_results(),
                amr_detection.get_resfinder_results(),
                amr_detection.get_pointfinder_results(),
                amr_detection.get_plasmidfinder_results(),
                amr_detection.get_detailed_summary_results(),
                amr_detection.get_mlst_results(), settings_dataframe,
                args.minimum_contig_length)
        else:
            logger.info(
                "--output-dir or --output-excel unset. No excel file will be written"
            )

        if hits_output_dir:
            logger.info("BLAST hits are stored in [%s]", hits_output_dir)
        else:
            logger.info(
                "--output-dir or --output-hits-dir not set. No BLAST hits will be saved."
            )