def root(self, options): """Root tree using outgroup. Parameters ---------- options : argparse.Namespace The CLI arguments input by the user. """ check_file_exists(options.input_tree) taxonomy = self._read_taxonomy_files(options) self.logger.info(f'Identifying genomes from the specified outgroup: {options.outgroup_taxon}') outgroup = set() for genome_id, taxa in taxonomy.items(): if options.outgroup_taxon in taxa: outgroup.add(genome_id) reroot = RerootTree() reroot.root_with_outgroup(options.input_tree, options.output_tree, outgroup) self.logger.info('Done.')
def identify(self, options): """Identify marker genes in genomes. Parameters ---------- options : argparse.Namespace The CLI arguments input by the user. """ if options.genome_dir: check_dir_exists(options.genome_dir) if options.batchfile: check_file_exists(options.batchfile) make_sure_path_exists(options.out_dir) genomes, tln_tables = self._genomes_to_process(options.genome_dir, options.batchfile, options.extension) self.genomes_to_process = genomes markers = Markers(options.cpus) markers.identify(genomes, tln_tables, options.out_dir, options.prefix, options.force, options.write_single_copy_genes) self.logger.info('Done.')
def infer_ranks(self, options): """Establish taxonomic ranks of internal nodes using RED.""" check_file_exists(options.input_tree) p = InferRanks() p.run(options.input_tree, options.ingroup_taxon, options.output_tree) self.logger.info('Done.')
def decorate(self, options): """Decorate tree with GTDB taxonomy. Parameters ---------- options : argparse.Namespace The CLI arguments input by the user. """ check_file_exists(options.input_tree) taxonomy = self._read_taxonomy_files(options) d = Decorate() d.run(options.input_tree, taxonomy, options.output_tree) self.logger.info('Done.') # symlink to the decorated tree file, if not run independently if hasattr(options, 'suffix'): if options.suffix == 'bac120': symlink_f( PATH_BAC120_DECORATED_TREE.format(prefix=options.prefix), os.path.join( options.out_dir, os.path.basename( PATH_BAC120_DECORATED_TREE.format( prefix=options.prefix)))) symlink_f( PATH_BAC120_DECORATED_TREE.format(prefix=options.prefix) + '-table', os.path.join( options.out_dir, os.path.basename( PATH_BAC120_DECORATED_TREE.format( prefix=options.prefix) + '-table'))) elif options.suffix == 'ar122': symlink_f( PATH_AR122_DECORATED_TREE.format(prefix=options.prefix), os.path.join( options.out_dir, os.path.basename( PATH_AR122_DECORATED_TREE.format( prefix=options.prefix)))) symlink_f( PATH_AR122_DECORATED_TREE.format(prefix=options.prefix) + '-table', os.path.join( options.out_dir, os.path.basename( PATH_AR122_DECORATED_TREE.format( prefix=options.prefix) + '-table'))) else: raise GenomeMarkerSetUnknown( 'There was an error determining the marker set.')
def infer(self, options): """Infer a tree from a user specified MSA. Parameters ---------- options : argparse.Namespace The CLI arguments input by the user. """ check_file_exists(options.msa_file) make_sure_path_exists(options.out_dir) if options.cpus > 1: check_dependencies(['FastTreeMP']) else: check_dependencies(['FastTree']) if hasattr(options, 'suffix'): output_tree = os.path.join( options.out_dir, PATH_MARKER_UNROOTED_TREE.format(prefix=options.prefix, marker=options.suffix)) tree_log = os.path.join( options.out_dir, PATH_MARKER_TREE_LOG.format(prefix=options.prefix, marker=options.suffix)) fasttree_log = os.path.join( options.out_dir, PATH_MARKER_FASTTREE_LOG.format(prefix=options.prefix, marker=options.suffix)) else: output_tree = os.path.join( options.out_dir, PATH_UNROOTED_TREE.format(prefix=options.prefix)) tree_log = os.path.join( options.out_dir, PATH_TREE_LOG.format(prefix=options.prefix)) fasttree_log = os.path.join( options.out_dir, PATH_FASTTREE_LOG.format(prefix=options.prefix)) fasttree = FastTree() fasttree.run(output_tree, tree_log, fasttree_log, options.prot_model, options.no_support, options.no_gamma, options.msa_file, options.cpus) self.logger.info(f'FastTree version: {fasttree.version}') if hasattr(options, 'subparser_name') and options.subparser_name == 'infer': symlink_f( output_tree[len(options.out_dir) + 1:], os.path.join(options.out_dir, os.path.basename(output_tree))) self.logger.info('Done.')
def _read_taxonomy_files(self, options) -> Dict[str, Tuple[str, str, str, str, str, str, str]]: """Read and merge taxonomy files.""" self.logger.info('Reading GTDB taxonomy for representative genomes.') taxonomy = Taxonomy().read(Config.TAXONOMY_FILE) if options.gtdbtk_classification_file: # add and overwrite taxonomy for genomes specified in the # GTDB-Tk classification file check_file_exists(options.gtdbtk_classification_file) self.logger.info('Reading GTDB-Tk classification file.') gtdbtk_taxonomy = Taxonomy().read(options.gtdbtk_classification_file) del gtdbtk_taxonomy['user_genome'] num_reassigned = 0 for gid, taxa in gtdbtk_taxonomy.items(): if gid in taxonomy: num_reassigned += 1 taxonomy[gid] = taxa self.logger.info(f'Read GTDB-Tk classifications for {len(gtdbtk_taxonomy):,} genomes.') self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.') if options.custom_taxonomy_file: # add and overwrite taxonomy for genomes specified in the # custom taxonomy file check_file_exists(options.custom_taxonomy_file) self.logger.info('Reading custom taxonomy file.') custom_taxonomy = Taxonomy().read(options.custom_taxonomy_file) num_reassigned = 0 for gid, taxa in custom_taxonomy.items(): if gid in taxonomy: num_reassigned += 1 taxonomy[gid] = taxa self.logger.info(f'Read custom taxonomy for {len(custom_taxonomy):,} genomes.') self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.') if options.gtdbtk_classification_file and options.custom_taxonomy_file: dup_genomes = set(gtdbtk_taxonomy).intersection(custom_taxonomy) if len(dup_genomes) > 0: self.logger.error('GTDB-Tk classification and custom taxonomy ' 'files must not specify taxonomies for the ' 'same genomes.') self.logger.error('These files have {:,} genomes in common.'.format(len(dup_genomes))) self.logger.error('Example duplicate genome: {}'.format(dup_genomes.pop())) raise GTDBTkExit('Duplicated taxonomy information.') self.logger.info(f'Read taxonomy for {len(taxonomy):,} genomes.') return taxonomy
def convert_to_itol(self, options): """Convert Tree to iTOL format. Parameters ---------- options : argparse.Namespace The CLI arguments input by the user. """ check_file_exists(options.input_tree) r = Misc() r.convert_to_itol(options.input_tree, options.output_tree) self.logger.info('Done.')
def root(self, options): """Root tree using outgroup. Parameters ---------- options : argparse.Namespace The CLI arguments input by the user. """ self.logger.warning("Tree rooting is still under development!") check_file_exists(options.input_tree) if options.custom_taxonomy_file: check_file_exists(options.custom_taxonomy_file) taxonomy = Taxonomy().read(options.custom_taxonomy_file) else: taxonomy = Taxonomy().read(Config.TAXONOMY_FILE) self.logger.info('Identifying genomes from the specified outgroup.') outgroup = set() for genome_id, taxa in taxonomy.items(): if options.outgroup_taxon in taxa: outgroup.add(genome_id) reroot = RerootTree() reroot.root_with_outgroup(options.input_tree, options.output_tree, outgroup) # Symlink to the tree summary file, if not run independently if hasattr(options, 'suffix'): if options.suffix == 'bac120': symlink_f( PATH_BAC120_ROOTED_TREE.format(prefix=options.prefix), os.path.join( options.out_dir, os.path.basename( PATH_AR122_ROOTED_TREE.format( prefix=options.prefix)))) elif options.suffix == 'ar122': symlink_f( PATH_AR122_ROOTED_TREE.format(prefix=options.prefix), os.path.join( options.out_dir, os.path.basename( PATH_AR122_ROOTED_TREE.format( prefix=options.prefix)))) else: raise GenomeMarkerSetUnknown( 'There was an error determining the marker set.') self.logger.info('Done.')
def decorate(self, options): """Decorate tree with GTDB taxonomy. Parameters ---------- options : argparse.Namespace The CLI arguments input by the user. """ check_file_exists(options.input_tree) # Config.TAXONOMY_FILE self.logger.warning('DECORATE NOT YET IMPLEMENTED!') self.logger.info('Done.')
def remove_labels(self, options): """Remove labels from tree. Parameters ---------- options : argparse.Namespace The CLI arguments input by the user. """ check_file_exists(options.input_tree) r = Misc() r.remove_labels(options.input_tree, options.output_tree) self.logger.info('Done.')