def compatible(self, options): """Compatible command""" check_file_exists(options.reference_file) check_file_exists(options.scaffold_stats_file) make_sure_path_exists(options.output_dir) # read scaffold statistics and calculate genome stats self.logger.info('Reading scaffold statistics.') scaffold_stats = ScaffoldStats() scaffold_stats.read(options.scaffold_stats_file) genome_stats = GenomeStats() genome_stats = genome_stats.run(scaffold_stats) # identify putative homologs to reference genomes reference = Reference(1, None) putative_homologs = reference.homology_check(options.reference_file, options.min_genes, float(options.perc_genes)) # identify scaffolds compatible with bins outliers = Outliers() output_file = os.path.join(options.output_dir, 'compatible.tsv') outliers.compatible(putative_homologs, scaffold_stats, genome_stats, options.gc_perc, options.td_perc, options.cov_corr, options.cov_perc, options.report_type, output_file) self.logger.info('Results written to: ' + output_file)
def reference(self, options): """Reference command""" self.logger.info('') self.logger.info('*******************************************************************************') self.logger.info('[RefineM - reference] Identifying scaffolds similar to specific genome(s).') self.logger.info('*******************************************************************************') check_file_exists(options.scaffold_prot_file) check_file_exists(options.scaffold_stats_file) make_sure_path_exists(options.output_dir) ref_gene_files = self._genome_files(options.ref_genome_prot_dir, options.protein_ext) if not self._check_protein_seqs(ref_gene_files): self.logger.warning('[Warning] All files must contain amino acid sequences.') sys.exit() reference = Reference(options.cpus, options.output_dir) reference_out = reference.run(options.scaffold_prot_file, options.scaffold_stats_file, ref_gene_files, options.db_file, options.evalue, options.per_identity) self.logger.info('') self.logger.info(' Results written to: ' + reference_out) self.time_keeper.print_time_stamp()
def reference(self, options): """Reference command""" check_file_exists(options.scaffold_prot_file) check_file_exists(options.scaffold_stats_file) make_sure_path_exists(options.output_dir) ref_gene_files = self._genome_files(options.ref_genome_prot_dir, options.protein_ext) if not self._check_protein_seqs(ref_gene_files): self.logger.warning('All files must contain amino acid sequences.') sys.exit() reference = Reference(options.cpus, options.output_dir) reference_out = reference.run(options.scaffold_prot_file, options.scaffold_stats_file, ref_gene_files, options.db_file, options.evalue, options.per_identity, options.per_aln_len) self.logger.info('Results written to: ' + reference_out)
def compatible(self, options): """Compatible command""" self.logger.info('') self.logger.info('*******************************************************************************') self.logger.info('[RefineM - compatible] Identify scaffolds with compatible genomic statistics.') self.logger.info('*******************************************************************************') check_file_exists(options.reference_file) check_file_exists(options.scaffold_stats_file) make_sure_path_exists(options.output_dir) # read scaffold statistics and calculate genome stats self.logger.info('') self.logger.info(' Reading scaffold statistics.') scaffold_stats = ScaffoldStats() scaffold_stats.read(options.scaffold_stats_file) genome_stats = GenomeStats() genome_stats = genome_stats.run(scaffold_stats) # identify putative homologs to reference genomes reference = Reference(1, None) putative_homologs = reference.homology_check(options.reference_file, options.min_genes, float(options.perc_genes)) # identify scaffolds compatible with bins outliers = Outliers() output_file = os.path.join(options.output_dir, 'compatible.tsv') outliers.compatible(putative_homologs, scaffold_stats, genome_stats, options.gc_perc, options.td_perc, options.cov_corr, options.cov_perc, options.report_type, output_file) self.logger.info('') self.logger.info(' Results written to: ' + output_file) self.time_keeper.print_time_stamp()