示例#1
0
文件: main.py 项目: gsc0107/RefineM
    def compatible(self, options):
        """Compatible command"""

        check_file_exists(options.reference_file)
        check_file_exists(options.scaffold_stats_file)
        make_sure_path_exists(options.output_dir)

        # read scaffold statistics and calculate genome stats
        self.logger.info('Reading scaffold statistics.')
        scaffold_stats = ScaffoldStats()
        scaffold_stats.read(options.scaffold_stats_file)

        genome_stats = GenomeStats()
        genome_stats = genome_stats.run(scaffold_stats)

        # identify putative homologs to reference genomes
        reference = Reference(1, None)
        putative_homologs = reference.homology_check(options.reference_file,
                                                     options.min_genes,
                                                     float(options.perc_genes))

        # identify scaffolds compatible with bins
        outliers = Outliers()
        output_file = os.path.join(options.output_dir, 'compatible.tsv')
        outliers.compatible(putative_homologs, scaffold_stats, genome_stats,
                            options.gc_perc, options.td_perc, options.cov_corr,
                            options.cov_perc, options.report_type, output_file)

        self.logger.info('Results written to: ' + output_file)
示例#2
0
    def reference(self, options):
        """Reference command"""
        self.logger.info('')
        self.logger.info('*******************************************************************************')
        self.logger.info('[RefineM - reference] Identifying scaffolds similar to specific genome(s).')
        self.logger.info('*******************************************************************************')

        check_file_exists(options.scaffold_prot_file)
        check_file_exists(options.scaffold_stats_file)
        make_sure_path_exists(options.output_dir)

        ref_gene_files = self._genome_files(options.ref_genome_prot_dir, options.protein_ext)
        if not self._check_protein_seqs(ref_gene_files):
            self.logger.warning('[Warning] All files must contain amino acid sequences.')
            sys.exit()

        reference = Reference(options.cpus, options.output_dir)
        reference_out = reference.run(options.scaffold_prot_file,
                                        options.scaffold_stats_file,
                                        ref_gene_files,
                                        options.db_file,
                                        options.evalue,
                                        options.per_identity)

        self.logger.info('')
        self.logger.info('  Results written to: ' + reference_out)

        self.time_keeper.print_time_stamp()
示例#3
0
文件: main.py 项目: gsc0107/RefineM
    def reference(self, options):
        """Reference command"""

        check_file_exists(options.scaffold_prot_file)
        check_file_exists(options.scaffold_stats_file)
        make_sure_path_exists(options.output_dir)

        ref_gene_files = self._genome_files(options.ref_genome_prot_dir,
                                            options.protein_ext)
        if not self._check_protein_seqs(ref_gene_files):
            self.logger.warning('All files must contain amino acid sequences.')
            sys.exit()

        reference = Reference(options.cpus, options.output_dir)
        reference_out = reference.run(options.scaffold_prot_file,
                                      options.scaffold_stats_file,
                                      ref_gene_files, options.db_file,
                                      options.evalue, options.per_identity,
                                      options.per_aln_len)

        self.logger.info('Results written to: ' + reference_out)
示例#4
0
    def compatible(self, options):
        """Compatible command"""
        self.logger.info('')
        self.logger.info('*******************************************************************************')
        self.logger.info('[RefineM - compatible] Identify scaffolds with compatible genomic statistics.')
        self.logger.info('*******************************************************************************')

        check_file_exists(options.reference_file)
        check_file_exists(options.scaffold_stats_file)
        make_sure_path_exists(options.output_dir)

        # read scaffold statistics and calculate genome stats
        self.logger.info('')
        self.logger.info('  Reading scaffold statistics.')
        scaffold_stats = ScaffoldStats()
        scaffold_stats.read(options.scaffold_stats_file)

        genome_stats = GenomeStats()
        genome_stats = genome_stats.run(scaffold_stats)

        # identify putative homologs to reference genomes
        reference = Reference(1, None)
        putative_homologs = reference.homology_check(options.reference_file,
                                                         options.min_genes,
                                                         float(options.perc_genes))

        # identify scaffolds compatible with bins
        outliers = Outliers()
        output_file = os.path.join(options.output_dir, 'compatible.tsv')
        outliers.compatible(putative_homologs, scaffold_stats, genome_stats,
                                      options.gc_perc, options.td_perc,
                                      options.cov_corr, options.cov_perc,
                                      options.report_type, output_file)

        self.logger.info('')
        self.logger.info('  Results written to: ' + output_file)

        self.time_keeper.print_time_stamp()