Exemplo n.º 1
0
class TestReport(MacsyTest):
    def setUp(self):
        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 30
        args.out_dir = os.path.join(args.res_search_dir,
                                    'test_macsyfinder_Report')
        if os.path.exists(args.out_dir):
            shutil.rmtree(args.out_dir)
        os.mkdir(args.out_dir)

        seq_db = self.find_data("base", "test_base.fa")
        shutil.copy(seq_db, args.out_dir)
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))
        self.cfg = Config(MacsyDefaults(), args)

        os.mkdir(os.path.join(self.cfg.out_dir(), self.cfg.hmmer_dir()))

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        self.profile_factory = ProfileFactory(self.cfg)

        idx = Indexes(self.cfg)
        idx.build()

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except Exception:
            pass
Exemplo n.º 2
0
def main(args=None, log_level=None) -> None:
    """
    main entry point to macsyprofile

    :param args: the arguments passed on the command line without the program name
    :type args: List of string
    :param log_level: the output verbosity
    :type log_level: a positive int or a string among 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
    """
    global _log
    args = sys.argv[1:] if args is None else args
    parsed_args = parse_args(args)

    if log_level is None:
        log_level = verbosity_to_log_level(parsed_args.verbosity)
    _log = init_logger(log_level, out=(not parsed_args.mute))

    if not os.path.exists(parsed_args.previous_run):
        _log.critical(f"{parsed_args.previous_run}: No such directory.")
        sys.tracebacklimit = 0
        raise FileNotFoundError() from None
    elif not os.path.isdir(parsed_args.previous_run):
        _log.critical(f"{parsed_args.previous_run} is not a directory.")
        sys.tracebacklimit = 0
        raise ValueError() from None

    defaults = MacsyDefaults(i_evalue_sel=1.0e9, coverage_profile=-1.0)
    cfg = Config(defaults, parsed_args)

    msf_run_path = cfg.previous_run()
    hmmer_results = os.path.join(msf_run_path, cfg.hmmer_dir())
    hmm_suffix = cfg.res_search_suffix()
    profile_suffix = cfg.profile_suffix()
    if parsed_args.out:
        profile_report_path = os.path.normpath(parsed_args.out)
        dirname = os.path.normpath(os.path.dirname(parsed_args.out))
        if not os.path.exists(dirname):
            _log.critical(f"The {dirname} directory is not writable")
            sys.tracebacklimit = 0
            raise ValueError() from None
    else:
        profile_report_path = os.path.join(cfg.previous_run(),
                                           'hmm_coverage.tsv')

    if os.path.exists(profile_report_path) and not parsed_args.force:
        _log.critical(
            f"The file {profile_report_path} already exists. "
            f"Remove it or specify a new output name --out or use --force option"
        )
        sys.tracebacklimit = 0
        raise ValueError() from None

    hmmer_files = sorted(
        glob.glob(
            os.path.join(hmmer_results, f"{parsed_args.pattern}{hmm_suffix}")))
    try:
        model_familly_name = cfg.models()[0]
        model_dir = [
            p for p in
            [os.path.join(p, model_familly_name) for p in cfg.models_dir()]
            if os.path.exists(p)
        ][-1]
        profiles_dir = os.path.join(model_dir, 'profiles')
    except IndexError:
        _log.critical(
            f"Cannot find models in conf file {msf_run_path}. "
            f"May be these results have been generated with an old version of macsyfinder."
        )
        sys.tracebacklimit = 0
        raise ValueError() from None

    _log.debug(f"hmmer_files: {hmmer_files}")
    all_hits = []
    with open(profile_report_path, 'w') as prof_out:
        print(header(args), file=prof_out)
        for hmmer_out_path in hmmer_files:
            _log.info(f"parsing {hmmer_out_path}")
            gene_name = get_gene_name(hmmer_out_path, hmm_suffix)
            profile_path = os.path.join(profiles_dir,
                                        f"{gene_name}{profile_suffix}")
            gene_profile_len = get_profile_len(profile_path)
            hmm = HmmProfile(gene_name, gene_profile_len, hmmer_out_path, cfg)
            hits = hmm.parse()
            all_hits += hits
        if len(all_hits) > 0:
            if parsed_args.best_hits:
                # It's important to keep this sorting to have in last all_hits version
                # the hits with the same replicon_name and position sorted by score
                # the best score in first
                hits_by_replicon = {}
                for hit in all_hits:
                    if hit.replicon_name in hits_by_replicon:
                        hits_by_replicon[hit.replicon_name].append(hit)
                    else:
                        hits_by_replicon[hit.replicon_name] = [hit]
                all_hits = []
                for rep_name in hits_by_replicon:
                    hits_by_replicon[rep_name] = get_best_hits(
                        hits_by_replicon[rep_name], key=parsed_args.best_hits)
                    all_hits += sorted(hits_by_replicon[rep_name],
                                       key=lambda h: h.position)

            all_hits = sorted(
                all_hits,
                key=lambda h:
                (h.gene_name, h.replicon_name, h.position, h.score))
            _log.info(f"found {len(all_hits)} hits")
            for hit in all_hits:
                print(hit, file=prof_out)
            _log.info(f"result is in '{profile_report_path}'")
        else:
            _log.info("No hit found")