def setUp(self): blast_databases_repositories = AMRDatabasesManager.create_default_manager( ).get_database_repos() self.resfinder_dir = blast_databases_repositories.get_repo_dir( 'resfinder') self.pointfinder_dir = blast_databases_repositories.get_repo_dir( 'pointfinder') self.plasmidfinder_dir = blast_databases_repositories.get_repo_dir( 'plasmidfinder') self.resfinder_database = ResfinderBlastDatabase(self.resfinder_dir) self.resfinder_drug_table = ARGDrugTableResfinder() self.pointfinder_drug_table = ARGDrugTablePointfinder() self.plasmidfinder_database = PlasmidfinderBlastDatabase( self.plasmidfinder_dir) self.pointfinder_database = None self.blast_out = tempfile.TemporaryDirectory() self.blast_handler = JobHandler( { 'resfinder': self.resfinder_database, 'pointfinder': self.pointfinder_database, 'plasmidfinder': self.plasmidfinder_database }, 2, self.blast_out.name) self.outdir = tempfile.TemporaryDirectory() self.amr_detection = AMRDetectionResistance( self.resfinder_database, self.resfinder_drug_table, self.blast_handler, self.pointfinder_drug_table, self.pointfinder_database, output_dir=self.outdir.name) self.test_data_dir = path.join(path.dirname(__file__), '..', 'data')
def run(self, args): super(Build, self).run(args) if path.exists(args.destination): if args.destination == self._default_dir: raise CommandParseException("Error, default destination [" + args.destination + "] already exists", self._root_arg_parser, print_help=True) else: raise CommandParseException( "Error, destination [" + args.destination + "] already exists", self._root_arg_parser) else: mkdir(args.destination) if args.destination == AMRDatabasesManager.get_default_database_directory( ): database_repos = AMRDatabasesManager.create_default_manager( ).get_database_repos() else: database_repos = AMRDatabasesManager( args.destination).get_database_repos() database_repos.build({ 'resfinder': args.resfinder_commit, 'pointfinder': args.pointfinder_commit }) if not AMRDatabasesManager.is_database_repos_default_commits( database_repos): logger.warning( "Built non-default ResFinder/PointFinder database version. This may lead to " + "differences in the detected AMR genes depending on how the database files are structured." )
def run(self, args): super(Update, self).run(args) if len(args.directories) == 0: if not args.update_default: raise CommandParseException("Must pass at least one directory to update, or use '--update-default'", self._root_arg_parser, print_help=True) else: try: database_repos = AMRDatabasesManager.create_default_manager().get_database_repos( force_use_git=True) database_repos.update( {'resfinder': args.resfinder_commit, 'pointfinder': args.pointfinder_commit, 'plasmidfinder': args.plasmidfinder_commit}) if not AMRDatabasesManager.is_database_repos_default_commits(database_repos): logger.warning( "Updated to non-default ResFinder/PointFinder/PlasmidFinder database version. This may lead to " + "differences in the detected AMR genes depending on how the database files are structured.") except DatabaseErrorException as e: logger.error( "Could not update default database. Please try restoring with 'staramr db restore-default'") raise e else: for directory in args.directories: database_repos = AMRDatabasesManager(directory).get_database_repos() database_repos.update({'resfinder': args.resfinder_commit, 'pointfinder': args.pointfinder_commit, 'plasmidfinder': args.plasmidfinder_commit}) if not AMRDatabasesManager.is_database_repos_default_commits(database_repos): logger.warning( "Updated to non-default ResFinder/PointFinder/PlasmidFinder database version [%s]. This may lead to " + "differences in the detected AMR genes depending on how the database files are structured.", directory)
def run(self, args): super(RestoreDefault, self).run(args) database_manager = AMRDatabasesManager.create_default_manager() if not args.force: response = self._confirm_restore() else: response = True if response: database_manager.restore_default()
def run(self, args): super(Info, self).run(args) arg_drug_table = ARGDrugTable() if len(args.directories) == 0: database_repos = AMRDatabasesManager.create_default_manager( ).get_database_repos() if not AMRDatabasesManager.is_database_repos_default_commits( database_repos): logger.warning( "Using non-default ResFinder/PointFinder database versions. This may lead to differences in the detected " + "AMR genes depending on how the database files are structured." ) try: database_info = database_repos.info() database_info.update( arg_drug_table.get_resistance_table_info()) sys.stdout.write(get_string_with_spacing(database_info)) except DatabaseNotFoundException as e: logger.error( "No database found. Perhaps try restoring the default with 'staramr db restore-default'" ) else: for directory in args.directories: try: database_repos = AMRDatabasesManager( directory).get_database_repos() if not AMRDatabasesManager.is_database_repos_default_commits( database_repos): logger.warning( "Using non-default ResFinder/PointFinder database version [%s]. This may lead to " + "differences in the detected AMR genes depending on how the database files are structured.", directory) database_info = database_repos.info() database_info.update( arg_drug_table.get_resistance_table_info()) sys.stdout.write(get_string_with_spacing(database_info)) except DatabaseNotFoundException as e: logger.error( "Database not found in [%s]. Perhaps try building with 'staramr db build --dir %s'", directory, directory)
def run(self, args): super(Search, self).run(args) if (len(args.files) == 0): raise CommandParseException("Must pass a fasta file to process", self._root_arg_parser, print_help=True) for file in args.files: if not path.exists(file): raise CommandParseException( 'File [' + file + '] does not exist', self._root_arg_parser) if not path.isdir(args.database): if args.database == self._default_database_dir: raise CommandParseException( "Default database does not exist. Perhaps try restoring with 'staramr db restore-default'", self._root_arg_parser) else: raise CommandParseException( "Database directory [" + args.database + "] does not exist. Perhaps try building with" + "'staramr db build --dir " + args.database + "'", self._root_arg_parser) if args.database == AMRDatabasesManager.get_default_database_directory( ): database_repos = AMRDatabasesManager.create_default_manager( ).get_database_repos() else: database_repos = AMRDatabasesManager( args.database).get_database_repos() if not AMRDatabasesManager.is_database_repos_default_commits( database_repos): logger.warning( "Using non-default ResFinder/PointFinder. This may lead to differences in the detected " + "AMR genes depending on how the database files are structured." ) resfinder_database = database_repos.build_blast_database('resfinder') if (args.pointfinder_organism): if args.pointfinder_organism not in PointfinderBlastDatabase.get_available_organisms( ): raise CommandParseException( "The only Pointfinder organism(s) currently supported are " + str(PointfinderBlastDatabase.get_available_organisms()), self._root_arg_parser) pointfinder_database = database_repos.build_blast_database( 'pointfinder', {'organism': args.pointfinder_organism}) else: logger.info( "No --pointfinder-organism specified. Will not search the PointFinder databases" ) pointfinder_database = None if (args.plasmidfinder_database_type): if args.plasmidfinder_database_type not in PlasmidfinderBlastDatabase.get_available_databases( ): raise CommandParseException( "The only Plasmidfinder databases that are currently supported are " + str(PlasmidfinderBlastDatabase.get_available_databases()), self._root_arg_parser) plasmidfinder_database = database_repos.build_blast_database( 'plasmidfinder', {'database_type': args.plasmidfinder_database_type}) else: logger.info( "No --plasmidfinder-database-type specified. Will search the entire PlasmidFinder database" ) plasmidfinder_database = database_repos.build_blast_database( 'plasmidfinder') hits_output_dir = None output_summary = None output_detailed_summary = None output_resfinder = None output_pointfinder = None output_plasmidfinder = None output_mlst = None output_excel = None output_settings = None if args.output_dir: if path.exists(args.output_dir): raise CommandParseException( "Output directory [" + args.output_dir + "] already exists", self._root_arg_parser) elif args.output_summary or args.output_detailed_summary or args.output_resfinder or args.output_pointfinder or args.output_plasmidfinder or args.output_excel or \ args.hits_output_dir: raise CommandParseException( 'You cannot use --output-[type] with --output-dir', self._root_arg_parser) else: mkdir(args.output_dir) hits_output_dir = path.join(args.output_dir, 'hits') output_resfinder = path.join(args.output_dir, "resfinder.tsv") output_pointfinder = path.join(args.output_dir, "pointfinder.tsv") output_plasmidfinder = path.join(args.output_dir, "plasmidfinder.tsv") output_summary = path.join(args.output_dir, "summary.tsv") output_detailed_summary = path.join(args.output_dir, "detailed_summary.tsv") output_mlst = path.join(args.output_dir, "mlst.tsv") output_settings = path.join(args.output_dir, "settings.txt") output_excel = path.join(args.output_dir, 'results.xlsx') mkdir(hits_output_dir) logger.info( "--output-dir set. All files will be output to [%s]", args.output_dir) elif args.output_summary or args.output_excel or args.output_detailed_summary: logger.info( '--output-dir not set. Files will be output to the respective --output-[type] setting' ) output_resfinder = args.output_resfinder output_pointfinder = args.output_pointfinder output_plasmidfinder = args.output_plasmidfinder output_summary = args.output_summary output_detailed_summary = args.output_detailed_summary output_mlst = args.output_mlst output_settings = args.output_settings output_excel = args.output_excel hits_output_dir = args.hits_output_dir if hits_output_dir is not None: if path.exists( hits_output_dir) and not path.isdir(hits_output_dir): raise CommandParseException( "--output-hits-dir [" + hits_output_dir + "] exists and is not a directory", self._root_arg_parser) elif path.exists(hits_output_dir): logger.debug( "Found --output-hits-dir [%s] and is a directory. Will write hits here", hits_output_dir) else: logger.debug("Making directory [%s]", hits_output_dir) mkdir(hits_output_dir) else: raise CommandParseException( 'You must set one of --output-dir, --output-summary, --output-detailed-summary, or --output-excel', self._root_arg_parser) if args.no_exclude_genes: logger.info( "--no-exclude-genes enabled. Will not exclude any ResFinder/PointFinder genes." ) exclude_genes = [] else: if not path.exists(args.exclude_genes_file): raise CommandParseException( '--exclude-genes-file [{}] does not exist'.format( args.exclude_genes_file), self._root_arg_parser) else: logger.info( "Will exclude ResFinder/PointFinder genes listed in [%s]. Use --no-exclude-genes to disable", args.exclude_genes_file) exclude_genes = ExcludeGenesList( args.exclude_genes_file).tolist() results = self._generate_results( database_repos=database_repos, resfinder_database=resfinder_database, pointfinder_database=pointfinder_database, plasmidfinder_database=plasmidfinder_database, nprocs=args.nprocs, include_negatives=not args.exclude_negatives, include_resistances=not args.exclude_resistance_phenotypes, hits_output=hits_output_dir, pid_threshold=args.pid_threshold, plength_threshold_resfinder=args.plength_threshold_resfinder, plength_threshold_pointfinder=args.plength_threshold_pointfinder, plength_threshold_plasmidfinder=args. plength_threshold_plasmidfinder, report_all_blast=args.report_all_blast, genes_to_exclude=exclude_genes, files=args.files, ignore_invalid_files=args.ignore_valid_files, mlst_scheme=args.mlst_scheme, genome_size_lower_bound=args.genome_size_lower_bound, genome_size_upper_bound=args.genome_size_upper_bound, minimum_N50_value=args.minimum_N50_value, minimum_contig_length=args.minimum_contig_length, unacceptable_num_contigs=args.unacceptable_num_contigs) amr_detection = results['results'] settings = results['settings'] if output_resfinder: logger.info("Writing resfinder to [%s]", output_resfinder) with open(output_resfinder, 'w') as fh: self._print_dataframe_to_text_file_handle( amr_detection.get_resfinder_results(), fh) else: logger.info( "--output-dir or --output-resfinder unset. No resfinder file will be written" ) if args.pointfinder_organism and output_pointfinder: logger.info("Writing pointfinder to [%s]", output_pointfinder) with open(output_pointfinder, 'w') as fh: self._print_dataframe_to_text_file_handle( amr_detection.get_pointfinder_results(), fh) else: logger.info( "--output-dir or --output-pointfinder unset. No pointfinder file will be written" ) if output_plasmidfinder: logger.info("Writing plasmidfinder to [%s]", output_plasmidfinder) with open(output_plasmidfinder, 'w') as fh: self._print_dataframe_to_text_file_handle( amr_detection.get_plasmidfinder_results(), fh) else: logger.info( "--output-dir or --output-plasmidfinder unset. No plasmidfinder file will be written" ) if output_summary: logger.info("Writing summary to [%s]", output_summary) with open(output_summary, 'w') as fh: self._print_dataframe_to_text_file_handle( amr_detection.get_summary_results(), fh) else: logger.info( "--output-dir or --output-summary unset. No summary file will be written" ) if output_mlst: logger.info("Writing MLST summary to [%s]", output_mlst) with open(output_mlst, 'w') as fh: self._print_dataframe_to_text_file_handle( amr_detection.get_mlst_results(), fh) else: logger.info( "--output-dir or --output-mlst unset. No mlst file will be written" ) if output_detailed_summary: logger.info("Writing detailed summary to [%s]", output_detailed_summary) with open(output_detailed_summary, 'w') as fh: self._print_dataframe_to_text_file_handle( amr_detection.get_detailed_summary_results(), fh) else: logger.info( "--output-dir or --output-detailed-summary unset. No detailed summary file will be written" ) if output_settings: logger.info("Writing settings to [%s]", output_settings) self._print_settings_to_file(settings, output_settings) else: logger.info( "--output-dir or --output-settings unset. No settings file will be written" ) if output_excel: logger.info("Writing Excel to [%s]", output_excel) settings_dataframe = pd.DataFrame.from_dict(settings, orient='index') settings_dataframe.index.name = 'Key' settings_dataframe.set_axis(['Value'], axis='columns', inplace=True) self._print_dataframes_to_excel( output_excel, amr_detection.get_summary_results(), amr_detection.get_resfinder_results(), amr_detection.get_pointfinder_results(), amr_detection.get_plasmidfinder_results(), amr_detection.get_detailed_summary_results(), amr_detection.get_mlst_results(), settings_dataframe, args.minimum_contig_length) else: logger.info( "--output-dir or --output-excel unset. No excel file will be written" ) if hits_output_dir: logger.info("BLAST hits are stored in [%s]", hits_output_dir) else: logger.info( "--output-dir or --output-hits-dir not set. No BLAST hits will be saved." )