Пример #1
0
    def test_build_my_indexes(self):
        args = argparse.Namespace()
        args.db_type = 'gembase'

        args.out_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_indexes')
        if os.path.exists(args.out_dir):
            shutil.rmtree(
                os.path.join(tempfile.gettempdir(),
                             'test_macsyfinder_indexes'))
        os.makedirs(args.out_dir)
        seq_db = self.find_data("base", "test_base_with_errors.fa")
        shutil.copy(seq_db, args.out_dir)
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))
        self.cfg = Config(MacsyDefaults(), args)

        idx = Indexes(self.cfg)
        with self.assertRaises(MacsypyError) as e:
            # the directory for index exist and is writable but
            # the sequence file is corrupted and cannot be read correctly
            with self.catch_log():
                idx._build_my_indexes(args.out_dir)
        self.assertTrue(
            str(e.exception).startswith(
                "unable to index the sequence dataset:"))
Пример #2
0
    def test_fill_ordered_replicon_min_max(self):
        self.tearDown()
        self.cfg = Config(hmmer_exe="hmmsearch",
                          sequence_db=os.path.join(self._data_dir, "base",
                                                   "ordered_replicon_base"),
                          db_type="ordered_replicon",
                          e_value_res=1,
                          i_evalue_sel=0.5,
                          def_dir=os.path.join(self._data_dir, 'DEF'),
                          res_search_dir='/tmp',
                          res_search_suffix=".search_hmm.out",
                          profile_dir=os.path.join(self._data_dir, 'profiles'),
                          profile_suffix=".hmm",
                          res_extract_suffix="",
                          log_level=30,
                          log_file='/dev/null')

        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(
            self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(self.cfg)
        db._fill_ordered_min_max(self.cfg.replicon_topology)

        self.assertEqual(len(db._DB), 1)
        rep = db[RepliconDB.ordered_replicon_name]
        self.assertEqual(rep.topology, self.cfg.replicon_topology)
        self.assertEqual(rep.min, 1)
        self.assertEqual(rep.max, 52)
Пример #3
0
    def setUp(self):
        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 30
        args.out_dir = os.path.join(args.res_search_dir,
                                    'test_macsyfinder_Report')
        if os.path.exists(args.out_dir):
            shutil.rmtree(args.out_dir)
        os.mkdir(args.out_dir)

        seq_db = self.find_data("base", "test_base.fa")
        shutil.copy(seq_db, args.out_dir)
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))
        self.cfg = Config(MacsyDefaults(), args)

        os.mkdir(os.path.join(self.cfg.out_dir(), self.cfg.hmmer_dir()))

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        self.profile_factory = ProfileFactory(self.cfg)

        idx = Indexes(self.cfg)
        idx.build()
Пример #4
0
    def parse(self) -> List[LightHit]:
        """
        parse a hmm output file and extract all hits and do some basic computation (coverage profile)

        :return: The list of extracted hits
        """
        all_hits = []
        idx = Indexes(self.cfg)
        macsyfinder_idx = idx.build()
        my_db = self._build_my_db(self._hmmer_raw_out)
        self._fill_my_db(macsyfinder_idx, my_db)

        with open(self._hmmer_raw_out, 'r') as hmm_out:
            i_evalue_sel = self.cfg.i_evalue_sel()
            coverage_threshold = self.cfg.coverage_profile()
            hmm_hits = (x[1] for x in groupby(hmm_out, self._hit_start))
            # drop summary
            next(hmm_hits)
            for hmm_hit in hmm_hits:
                hit_id = self._parse_hmm_header(hmm_hit)
                seq_lg, position_hit = my_db[hit_id]

                replicon_name = self._get_replicon_name(hit_id)

                body = next(hmm_hits)
                l_hit = self._parse_hmm_body(hit_id, self.gene_profile_lg,
                                             seq_lg, coverage_threshold,
                                             replicon_name, position_hit,
                                             i_evalue_sel, body)
                all_hits += l_hit
            hits = sorted(all_hits, key=lambda h: -h.score)
        return hits
Пример #5
0
 def test_find_my_indexes(self):
     idx = Indexes(self.cfg)
     self.assertIsNone(idx.find_my_indexes())
     new_idx = os.path.join(os.path.dirname(self.cfg.sequence_db),
                            idx.name + ".idx")
     open(new_idx, 'w')
     self.assertEqual(idx.find_my_indexes(), new_idx)
Пример #6
0
 def test_build_no_idx(self):
     idx = Indexes(self.cfg)
     my_idx = idx.build()
     self.assertEqual(
         my_idx,
         os.path.join(os.path.dirname(self.cfg.sequence_db()),
                      idx.name + ".idx"))
Пример #7
0
    def setUp(self):
        self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_search_genes')
        if os.path.exists(self.tmp_dir):
            shutil.rmtree(self.tmp_dir)
        os.mkdir(self.tmp_dir)

        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_base.fa")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.log_level = 30
        args.out_dir = os.path.join(self.tmp_dir, 'job_1')
        args.res_search_dir = args.out_dir
        os.mkdir(args.out_dir)

        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
        self.profile_factory = ProfileFactory(self.cfg)
Пример #8
0
    def test_fill_my_db(self):
        gene_name = "gspD"
        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.log_level = 30
        args.sequence_db = self.find_data("base", "test_base.fa")
        args.index_dir = self.tmpdir
        cfg = Config(MacsyDefaults(), args)
        gspD_hmmer_path = self.find_data('hmm', 'gspD.search_hmm.out')

        idx = Indexes(cfg)
        macsyfinder_idx = idx.build()
        hmm_prof = macsyprofile.HmmProfile(gene_name, 596, gspD_hmmer_path,
                                           cfg)

        db = hmm_prof._build_my_db(gspD_hmmer_path)
        hmm_prof._fill_my_db(macsyfinder_idx, db)
        self.assertDictEqual(
            db, {
                'PSAE001c01_031420': (658, 73),
                'PSAE001c01_051090': (714, 75),
                'PSAE001c01_018920': (776, 71),
                'PSAE001c01_043580': (416, 74),
                'PSAE001c01_017350': (600, 70),
                'PSAE001c01_013980': (759, 69),
                'PSAE001c01_026600': (273, 72),
                'NC_xxxxx_xx_056141': (803, 141),
                'PSAE001c01_006940': (803, 68)
            })
Пример #9
0
    def setUp(self):
        l = logging.getLogger()
        l.manager.loggerDict.clear()

        # add only one handler to the macsypy logger
        from macsypy.gene import _log
        macsy_log = _log.parent
        log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null'
        log_handler = logging.FileHandler(log_file)
        macsy_log.addHandler(log_handler)

        self.cfg = Config(hmmer_exe="hmmsearch",
                          sequence_db=os.path.join(self._data_dir, "base",
                                                   "test_base.fa"),
                          db_type="gembase",
                          e_value_res=1,
                          i_evalue_sel=0.5,
                          def_dir=os.path.join(self._data_dir, 'DEF'),
                          res_search_dir=tempfile.gettempdir(),
                          res_search_suffix=".search_hmm.out",
                          profile_dir=os.path.join(self._data_dir, 'profiles'),
                          profile_suffix=".hmm",
                          res_extract_suffix="",
                          log_level=30,
                          log_file=log_file)
        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(
            self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))
        self.profile_registry = ProfilesRegistry(self.cfg)

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
Пример #10
0
    def test_fill_ordered_replicon_min_max(self):
        self.tearDown()
        self.cfg = Config( hmmer_exe = "hmmsearch",
                           sequence_db = os.path.join(self._data_dir, "base", "ordered_replicon_base"),
                           db_type = "ordered_replicon",
                           e_value_res = 1,
                           i_evalue_sel = 0.5,
                           def_dir = os.path.join(self._data_dir, 'DEF'),
                           res_search_dir = tempfile.gettempdir(),
                           res_search_suffix = ".search_hmm.out",
                           profile_dir = os.path.join(self._data_dir, 'profiles'),
                           profile_suffix = ".hmm",
                           res_extract_suffix = "",
                           log_level = 30,
                           log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null'
                           )

        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(self.cfg)
        db._fill_ordered_min_max(self.cfg.replicon_topology)

        self.assertEqual(len(db._DB), 1)
        rep = db[RepliconDB.ordered_replicon_name]
        self.assertEqual(rep.topology, self.cfg.replicon_topology)
        self.assertEqual(rep.min, 1)
        self.assertEqual(rep.max, 52)
Пример #11
0
    def setUp(self):
        l = logging.getLogger()
        l.manager.loggerDict.clear()

        # add only one handler to the macsypy logger
        from macsypy.gene import _log

        macsy_log = _log.parent
        log_file = "NUL" if platform.system() == "Windows" else "/dev/null"
        log_handler = logging.FileHandler(log_file)
        macsy_log.addHandler(log_handler)

        self.cfg = Config(
            hmmer_exe="hmmsearch",
            sequence_db=os.path.join(self._data_dir, "base", "test_base.fa"),
            db_type="gembase",
            e_value_res=1,
            i_evalue_sel=0.5,
            def_dir=os.path.join(self._data_dir, "DEF"),
            res_search_dir=tempfile.gettempdir(),
            res_search_suffix=".search_hmm.out",
            profile_dir=os.path.join(self._data_dir, "profiles"),
            profile_suffix=".hmm",
            res_extract_suffix="",
            log_level=30,
            log_file=log_file,
        )
        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options["sequence_db"] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))
        self.profile_registry = ProfilesRegistry(self.cfg)

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
Пример #12
0
    def test_iter(self):
        idx = Indexes(self.cfg)
        with self.assertRaises(MacsypyError) as ctx:
            next(iter(idx))

        self.assertEqual(str(ctx.exception), 'Build index before to use it.')

        idx.build()
        expected_idx = [('VICH001.B.00001.C001_01359', 200, 1),
                        ('VICH001.B.00001.C001_01360', 484, 2),
                        ('VICH001.B.00001.C001_01361', 406, 3),
                        ('VICH001.B.00001.C001_01390', 326, 4),
                        ('VICH001.B.00001.C001_01391', 54, 5),
                        ('VICH001.B.00001.C001_01392', 206, 6),
                        ('VICH001.B.00001.C001_01393', 477, 7),
                        ('VICH001.B.00001.C001_01394', 126, 8),
                        ('VICH001.B.00001.C001_01395', 405, 9),
                        ('VICH001.B.00001.C001_01396', 572, 10),
                        ('VICH001.B.00001.C001_01397', 721, 11),
                        ('VICH001.B.00001.C001_01398', 467, 12),
                        ('VICH001.B.00001.C001_01399', 720, 13),
                        ('VICH001.B.00001.C001_01400', 559, 14),
                        ('VICH001.B.00001.C001_01401', 153, 15),
                        ('VICH001.B.00001.C001_01402', 4558, 16),
                        ('VICH001.B.00001.C001_01500', 120, 17),
                        ('VICH001.B.00001.C001_01501', 344, 18),
                        ('VICH001.B.00001.C001_01502', 478, 19),
                        ('VICH001.B.00001.C001_01503', 724, 20),
                        ('VICH001.B.00001.C001_01504', 309, 21),
                        ('VICH001.B.00001.C001_01505', 390, 22),
                        ('VICH001.B.00001.C001_01506', 419, 23),
                        ('VICH001.B.00001.C001_01540', 353, 24),
                        ('VICH001.B.00001.C001_01541', 229, 25),
                        ('VICH001.B.00001.C001_01542', 267, 26),
                        ('VICH001.B.00001.C001_01543', 328, 27),
                        ('VICH001.B.00001.C001_01544', 258, 28),
                        ('VICH001.B.00001.C001_01545', 228, 29),
                        ('VICH001.B.00001.C001_01546', 538, 30),
                        ('VICH001.B.00001.C001_01547', 77, 31),
                        ('VICH001.B.00001.C001_01548', 476, 32),
                        ('VICH001.B.00001.C001_01549', 324, 33),
                        ('VICH001.B.00001.C001_01550', 387, 34),
                        ('VICH001.B.00001.C001_01551', 382, 35),
                        ('VICH001.B.00001.C001_01552', 149, 36),
                        ('VICH001.B.00001.C001_01553', 319, 37),
                        ('VICH001.B.00001.C001_01554', 237, 38),
                        ('VICH001.B.00001.C001_01555', 74, 39),
                        ('VICH001.B.00001.C001_01556', 362, 40),
                        ('VICH001.B.00001.C001_01557', 170, 41),
                        ('VICH001.B.00001.C001_01558', 77, 42),
                        ('VICH001.B.00001.C001_01559', 296, 43),
                        ('VICH001.B.00001.C001_01560', 405, 44),
                        ('VICH001.B.00001.C001_01561', 182, 45),
                        ('VICH001.B.00001.C001_01562', 445, 46),
                        ('VICH001.B.00001.C001_01563', 212, 47),
                        ('VICH001.B.00001.C001_01564', 387, 48),
                        ('VICH001.B.00001.C001_01565', 414, 49)]
        self.assertListEqual(list(iter(idx)), expected_idx)
 def test_build_no_idx(self):
     if not which('makeblastdb') and which('formatdb'):
         self.cfg.options['index_db_exe'] = 'formatdb'
     idx = Indexes(self.cfg)
     idx.build()
     my_idx = idx.find_my_indexes()
     hmmer_idx = idx.find_hmmer_indexes()
     self.assertEqual(my_idx, os.path.join( os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"))
     self.assertEqual( hmmer_idx , [ self.cfg.sequence_db + suffix for suffix in ('.phr', '.pin', '.psd', '.psi', '.psq')])
Пример #14
0
 def test_find_hmmer_indexes_all_files(self):
     idx = Indexes(self.cfg)
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     files_2_find = []
     for s in suffixes:
         new_idx = os.path.join(self.cfg.sequence_db + s)
         open(new_idx, 'w')
         files_2_find.append(new_idx)
     hmmer_idx = idx.find_hmmer_indexes()
     self.assertListEqual(hmmer_idx, files_2_find)
 def test_find_hmmer_indexes_all_files(self):
     idx = Indexes(self.cfg)
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     files_2_find = []
     for s in  suffixes:
         new_idx = os.path.join(self.cfg.sequence_db + s)
         open(new_idx, 'w')
         files_2_find.append(new_idx)
     hmmer_idx = idx.find_hmmer_indexes()
     self.assertListEqual(hmmer_idx, files_2_find)
Пример #16
0
    def setUp(self):
        l = logging.getLogger()
        l.manager.loggerDict.clear()
        self.cfg = Config( hmmer_exe = "hmmsearch",
                           sequence_db = os.path.join(self._data_dir, "base", "test_base.fa"),
                           db_type = "gembase",
                           e_value_res = 1,
                           i_evalue_sel = 0.5,
                           def_dir = os.path.join(self._data_dir, 'DEF'),
                           res_search_dir = tempfile.gettempdir(),
                           res_search_suffix = ".search_hmm.out",
                           profile_dir = os.path.join(self._data_dir, 'profiles'),
                           profile_suffix = ".hmm",
                           res_extract_suffix = "",
                           log_level = 30,
                           log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null'
                           )

        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))
        
        self.ESCO030p01_genes = [('000010', '886'), ('000020', '291'), ('000030', '656'), ('000040', '500'), ('000050', '407'),
                                 ('000060', '144'), ('000070', '183'), ('000080', '121'), ('000090', '199'), ('000100', '325'),
                                 ('000110', '425'), ('000120', '171'), ('000130', '277'), ('000140', '133'), ('000150', '108'),
                                 ('000160', '295'), ('000170', '273'), ('000180', '367'), ('000190', '573'), ('000200', '343'),
                                 ('000210', '295'), ('000220', '108'), ('000230', '117'), ('000240', '153'), ('000250', '479'),
                                 ('000260', '706'), ('000270', '998'), ('000280', '171'), ('000290', '108'), ('000300', '295'),
                                 ('000310', '165'), ('000320', '243'), ('000330', '295'), ('000340', '108'), ('000350', '1755'),
                                 ('000360', '248'), ('000370', '286'), ('000380', '186'), ('000390', '83'), ('000400', '153'),
                                 ('000410', '69'), ('000420', '295'), ('000430', '108'), ('000440', '145'), ('000450', '59'),
                                 ('000460', '124'), ('000470', '246'), ('000480', '325'), ('000490', '54'), ('000500', '95'),
                                 ('000510', '83'), ('000520', '56'), ('000530', '401'), ('000540', '320'), ('000550', '256'),
                                 ('000560', '73'), ('000570', '144'), ('000580', '258'), ('000590', '133'), ('000600', '140'),
                                 ('000610', '63'), ('000620', '138'), ('000630', '68'), ('000640', '169'), ('000650', '127'),
                                 ('000660', '295'), ('000670', '108'), ('000670', '108')]
                                
        self.PSAE001c01_genes = [('006940', '803'), ('013980', '759'), ('017350', '600'), ('018920', '776'), ('026600', '273'), 
                                 ('031420', '658'), ('043580', '416'), ('051090', '714'), ('055870', '449'), ('055880', '447'), 
                                 ('055890', '588'), ('055900', '292'), ('055910', '262'), ('055920', '166'), ('055930', '288'), 
                                 ('055940', '194'), ('055950', '567'), ('055960', '188'), ('055970', '247'), ('055980', '252'), 
                                 ('055990', '455'), ('056000', '450'), ('056010', '260'), ('056020', '246'), ('056030', '70'), 
                                 ('056040', '133'), ('056050', '284'), ('056060', '585'), ('056070', '435'), ('056080', '342'), 
                                 ('056090', '252'), ('056100', '122'), ('056110', '213'), ('056120', '400'), ('056130', '134'), 
                                 ('056140', '138'), ('056150', '397'), ('056160', '298'), ('056170', '186'), ('056180', '445'), 
                                 ('056190', '414'), ('056200', '132'), ('056210', '674'), ('056220', '319'), ('056230', '394'), 
                                 ('056240', '207'), ('056250', '401'), ('056260', '611'), ('056270', '257'), ('056280', '169'), 
                                 ('056290', '454'), ('056300', '141'), ('056310', '458'), ('056320', '286'), ('056330', '514'), 
                                 ('056340', '178'), ('056350', '156'), ('056360', '85'), ('056370', '289'), ('056380', '126'), 
                                 ('056390', '290'), ('056400', '262'), ('056410', '214'), ('056420', '630'), ('056430', '127'), 
                                 ('056440', '455'), ('056440', '455')]
        
        idx = Indexes(self.cfg)
        idx._build_my_indexes()
Пример #17
0
    def _fill_my_db(self, macsyfinder_idx: str, db: Dict) -> None:
        """
        Fill the dictionary with information on the matched sequences

        :param macsyfinder_idx: the path the macsyfinder index corresponding to the dataset
        :type  macsyfinder_idx: string
        :param db: the database containing all sequence id of the hits.
        :type db: dict
        """
        idx = Indexes(self.cfg)
        idx.build()
        for seqid, length, rank in idx:
            if seqid in db:
                db[seqid] = (length, rank)
Пример #18
0
 def test_build_not_writable(self):
     # Skip test on Windows, since setting the folder permissions is not affecting files inside
     # in Singularity container tess are run as root and this test as non sense
     idx = Indexes(self.cfg)
     idx_dir = os.path.join(os.path.dirname(self.cfg.sequence_db()))
     os.chmod(idx_dir, 0000)
     try:
         with self.assertRaises(IOError) as ctx:
             with self.catch_log():
                 idx.build()
         self.assertRegex(str(ctx.exception),
                          "cannot build indexes, \(.+/test_macsyfinder_indexes\) is not writable")
     finally:
         os.chmod(idx_dir, 0o777)
 def test_build_with_idx(self):
     if not which('makeblastdb') and which('formatdb'):
         self.cfg.options['index_db_exe'] = 'formatdb'
     #put fake hmmer indexes
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     for s in  suffixes:
         new_idx = os.path.join( self.cfg.sequence_db + s)
         open(new_idx, 'w')
     idx = Indexes(self.cfg)
     new_idx = open(os.path.join( os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"), 'w')
     idx.build()
     my_idx = idx.find_my_indexes()
     hmmer_idx = idx.find_hmmer_indexes()
     for f in hmmer_idx +[my_idx]:
         self.assertEqual(os.path.getsize(f), 0)
Пример #20
0
 def test_build_not_writable(self):
     # Skip test on Windows, since setting the folder permissions is not affecting files inside
     # in Singularity container tess are run as root and this test as non sense
     idx = Indexes(self.cfg)
     idx_dir = os.path.join(os.path.dirname(self.cfg.sequence_db))
     os.chmod(idx_dir, 0000)
     self.assertRaises(IOError, idx.build)
     os.chmod(idx_dir, 0777)
 def test_find_hmmer_indexes_all_files_and_virtual(self):
     idx = Indexes(self.cfg)
     #tester index + pal    
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     files_2_find = []
     for s in  suffixes:
         for i in range(2):
             new_idx = os.path.join("%s.%d.%s" %(self.cfg.sequence_db, i, s))
             open(new_idx, 'w')
             files_2_find.append(new_idx)
     new_idx = os.path.join(self.cfg.sequence_db + '.pal')
     open(new_idx, 'w')
     files_2_find.append(new_idx)
     files_2_find.sort()
     hmmer_idx = idx.find_hmmer_indexes()
     hmmer_idx.sort()
     self.assertListEqual(hmmer_idx, files_2_find)
Пример #22
0
 def test_find_hmmer_indexes_all_files_and_virtual(self):
     idx = Indexes(self.cfg)
     # tester index + pal
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     files_2_find = []
     for s in suffixes:
         for i in range(2):
             new_idx = os.path.join("{0}.{1:d}.{2}".format(self.cfg.sequence_db, i, s))
             open(new_idx, 'w')
             files_2_find.append(new_idx)
     new_idx = os.path.join(self.cfg.sequence_db + '.pal')
     open(new_idx, 'w')
     files_2_find.append(new_idx)
     files_2_find.sort()
     hmmer_idx = idx.find_hmmer_indexes()
     hmmer_idx.sort()
     self.assertListEqual(hmmer_idx, files_2_find)
Пример #23
0
 def test_find_hmmer_indexes_all_files_and_pal(self):
     idx = Indexes(self.cfg)
     # tester tous les fichiers + pal
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq', '.pal')
     for s in suffixes:
         new_idx = os.path.join(self.cfg.sequence_db + s)
         open(new_idx, 'w')
     self.assertRaises(RuntimeError, idx.find_hmmer_indexes)
Пример #24
0
 def test_find_hmmer_indexes_lack_pal(self):
     idx = Indexes(self.cfg)
     # tester plusieurs index pas de pal
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     for s in suffixes:
         for i in range(2):
             new_idx = os.path.join(self.cfg.sequence_db + str(i) + s)
             open(new_idx, 'w')
     self.assertRaises(RuntimeError, idx.find_hmmer_indexes)
Пример #25
0
    def test_fill_gembase_min_max_oredered_replicon(self):
        seq_ori = self.find_data("base", "ordered_replicon_base.fasta")
        shutil.copy(seq_ori, self.args.out_dir)
        self.args.sequence_db = os.path.join(self.args.out_dir,
                                             os.path.basename(seq_ori))
        cfg = Config(MacsyDefaults(), self.args)

        idx = Indexes(cfg)
        idx.build()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        with self.assertRaises(MacsypyError) as ctx:
            with self.catch_log() as log:
                db._fill_gembase_min_max({}, self.cfg.replicon_topology())
        self.assertEqual(
            str(ctx.exception),
            f"Error during sequence-db '{self.args.sequence_db}' parsing. "
            f"Are you sure db-type is 'gembase'?")
Пример #26
0
    def test_fill_ordered_replicon_min_max(self):
        seq_ori = self.find_data("base", "ordered_replicon_base.fasta")
        shutil.copy(seq_ori, self.args.out_dir)
        self.args.sequence_db = os.path.join(self.args.out_dir,
                                             os.path.basename(seq_ori))
        cfg = Config(MacsyDefaults(), self.args)

        idx = Indexes(cfg)
        idx.build()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        db._fill_ordered_min_max(cfg.replicon_topology())

        self.assertEqual(len(db._DB), 1)
        rep = db[RepliconDB.ordered_replicon_name]
        self.assertEqual(rep.topology, cfg.replicon_topology())
        self.assertEqual(rep.min, 1)
        self.assertEqual(rep.max, 52)
Пример #27
0
    def test_build_not_writable(self):
        idx = Indexes(self.cfg)
        idx_dir = os.path.join( os.path.dirname(self.cfg.sequence_db))

        # Skip test on Windows, since setting the folder permissions is not affecting files inside
        if platform.system() != 'Windows':
            os.chmod(idx_dir, 0000)
            self.assertRaises(IOError, idx.build)
            os.chmod(idx_dir, 0777)
Пример #28
0
 def test_find_hmmer_indexes_some_files(self):
     idx = Indexes(self.cfg)
     #tester pas tous les fichiers
     suffixes = ('.phr', '.pin', '.psd', '.psi')
     files_2_find = []
     for s in suffixes:
         new_idx = os.path.join(self.cfg.sequence_db + s)
         open(new_idx, 'w')
     self.assertRaises(RuntimeError, idx.find_hmmer_indexes)
Пример #29
0
 def test_find_hmmer_indexes_all_files_and_2virtual(self):
     idx = Indexes(self.cfg)
     #tester 1 fichier index + pal
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq', '.pal')
     files_2_find = []
     for s in suffixes:
         for i in range(2):
             new_idx = os.path.join(self.cfg.sequence_db + str(i) + s)
             open(new_idx, 'w')
             files_2_find.append(new_idx)
     self.assertRaises(RuntimeError, idx.find_hmmer_indexes)
    def setUp(self):
        self.cfg = Config( hmmer_exe = "hmmsearch",
                           sequence_db = os.path.join(self._data_dir, "base", "test_base.fa"),
                           db_type = "gembase",
                           e_value_res = 1,
                           i_evalue_sel = 0.5,
                           def_dir = os.path.join(self._data_dir, 'DEF'),
                           res_search_dir = '/tmp',
                           res_search_suffix = ".search_hmm.out",
                           profile_dir = os.path.join(self._data_dir, 'profiles'),
                           profile_suffix = ".hmm",
                           res_extract_suffix = "",
                           log_level = 30,
                           log_file = '/dev/null'
                           )
        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))
        self.profile_registry = ProfilesRegistry(self.cfg)

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
Пример #31
0
    def setUp(self):
        self.cfg = Config(hmmer_exe="hmmsearch",
                          sequence_db=os.path.join(self._data_dir, "base",
                                                   "test_base.fa"),
                          db_type="gembase",
                          e_value_res=1,
                          i_evalue_sel=0.5,
                          def_dir=os.path.join(self._data_dir, 'DEF'),
                          res_search_dir='/tmp',
                          res_search_suffix=".search_hmm.out",
                          profile_dir=os.path.join(self._data_dir, 'profiles'),
                          profile_suffix=".hmm",
                          res_extract_suffix="",
                          log_level=30,
                          log_file='/dev/null')
        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(
            self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))
        self.profile_registry = ProfilesRegistry(self.cfg)

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
Пример #32
0
    def setUp(self):
        l = logging.getLogger()
        l.manager.loggerDict.clear()
        self.cfg = Config( hmmer_exe = "hmmsearch",
                           sequence_db = os.path.join(self._data_dir, "base", "test_base.fa"),
                           db_type = "gembase",
                           e_value_res = 1,
                           i_evalue_sel = 0.5,
                           def_dir = os.path.join(self._data_dir, 'DEF'),
                           res_search_dir = tempfile.gettempdir(),
                           res_search_suffix = ".search_hmm.out",
                           profile_dir = os.path.join(self._data_dir, 'profiles'),
                           profile_suffix = ".hmm",
                           res_extract_suffix = "",
                           log_level = 30,
                           log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null'
                           )
        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))
        self.profile_registry = ProfilesRegistry(self.cfg)

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
Пример #33
0
 def test_build_no_idx(self):
     if not which('makeblastdb') and which('formatdb'):
         self.cfg.options['index_db_exe'] = 'formatdb'
     idx = Indexes(self.cfg)
     idx.build()
     my_idx = idx.find_my_indexes()
     hmmer_idx = idx.find_hmmer_indexes()
     self.assertEqual(my_idx, os.path.join(os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"))
     self.assertEqual(hmmer_idx, [self.cfg.sequence_db + suffix for suffix in ('.phr', '.pin', '.psd', '.psi', '.psq')])
Пример #34
0
    def test_build_my_indexes(self):
        args = argparse.Namespace()

        args.db_type = 'gembase'
        args.e_value_res = 1
        args.i_evalue_sel = 0.5
        args.models_dir = self.find_data('models')
        args.res_search_suffix = ''
        args.log_level = 30

        args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')
        if os.path.exists(args.out_dir):
            shutil.rmtree(os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes'))
        os.makedirs(args.out_dir)
        seq_db = self.find_data("base", "test_base_with_errors.fa")
        shutil.copy(seq_db, args.out_dir)
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))
        cfg = Config(MacsyDefaults(), args)

        idx = Indexes(cfg)
        with self.assertRaises(MacsypyError) as e:
            with self.catch_log():
                idx._build_my_indexes()
        self.assertTrue(str(e.exception).startswith("unable to index the sequence dataset:"))
Пример #35
0
 def test_fill_my_db(self):
     gene_name = "gspD"
     c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
     report_path = os.path.join(self.cfg.working_dir(),
                                gene_name + self.cfg.res_search_suffix())
     report = GembaseHMMReport(c_gene, report_path, self.cfg)
     idx = Indexes(self.cfg)
     macsyfinder_idx = idx.find_my_indexes()
     gspD_hmmer_path = self.find_data(
         os.path.join('hmm', 'gspD.search_hmm.out'))
     db = report._build_my_db(gspD_hmmer_path)
     report._fill_my_db(macsyfinder_idx, db)
     self.assertDictEqual(
         db, {
             'PSAE001c01_031420': (658, 73),
             'PSAE001c01_051090': (714, 75),
             'PSAE001c01_018920': (776, 71),
             'PSAE001c01_043580': (416, 74),
             'PSAE001c01_017350': (600, 70),
             'PSAE001c01_013980': (759, 69),
             'PSAE001c01_026600': (273, 72),
             'NC_xxxxx_xx_056141': (803, 141),
             'PSAE001c01_006940': (803, 68)
         })
Пример #36
0
 def test_build_with_idx(self):
     if not which('makeblastdb') and which('formatdb'):
         self.cfg.options['index_db_exe'] = 'formatdb'
     # put fake hmmer indexes
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     for s in suffixes:
         new_idx = os.path.join(self.cfg.sequence_db + s)
         open(new_idx, 'w')
     idx = Indexes(self.cfg)
     new_idx = open(os.path.join( os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"), 'w')
     idx.build()
     my_idx = idx.find_my_indexes()
     hmmer_idx = idx.find_hmmer_indexes()
     for f in hmmer_idx + [my_idx]:
         self.assertEqual(os.path.getsize(f), 0)
Пример #37
0
 def test_build_force(self):
     # put fake hmmer indexes
     if not which('makeblastdb') and which('formatdb'):
         self.cfg.options['index_db_exe'] = 'formatdb'
    
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     for s in suffixes:
         new_idx = os.path.join( self.cfg.sequence_db + s)
         open(new_idx, 'w')
     idx = Indexes(self.cfg)
     idx.build(force=True)
     my_idx = idx.find_my_indexes()
     hmmer_idx = idx.find_hmmer_indexes()
     for f in hmmer_idx + [my_idx]:
         self.assertNotEqual(os.path.getsize(f), 0)
Пример #38
0
 def fake_init(obj, cfg):
     obj.cfg = cfg
     idx = Indexes(self.cfg)
     obj.sequence_idx = idx.find_my_indexes()
     obj.topology_file = self.cfg.topology_file
     obj._DB = {}
 def test_find_hmmer_indexes_no_files(self):
     idx = Indexes(self.cfg)
     #tester pas de fichier
     hmmer_idx = idx.find_hmmer_indexes()
     self.assertListEqual(hmmer_idx, [])
Пример #40
0
 def fake_init(obj, cfg):
     obj.cfg = cfg
     idx = Indexes(self.cfg)
     obj.sequence_idx = idx.find_my_indexes()
     obj.topology_file = self.cfg.topology_file
     obj._DB = {}
Пример #41
0
    def setUp(self):
        self.cfg = Config(hmmer_exe="hmmsearch",
                          sequence_db=os.path.join(self._data_dir, "base",
                                                   "test_base.fa"),
                          db_type="gembase",
                          e_value_res=1,
                          i_evalue_sel=0.5,
                          def_dir=os.path.join(self._data_dir, 'DEF'),
                          res_search_dir='/tmp',
                          res_search_suffix=".search_hmm.out",
                          profile_dir=os.path.join(self._data_dir, 'profiles'),
                          profile_suffix=".hmm",
                          res_extract_suffix="",
                          log_level=30,
                          log_file='/dev/null')

        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(
            self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))

        self.ESCO030p01_genes = [('000010', '886'), ('000020', '291'),
                                 ('000030', '656'), ('000040', '500'),
                                 ('000050', '407'), ('000060', '144'),
                                 ('000070', '183'), ('000080', '121'),
                                 ('000090', '199'), ('000100', '325'),
                                 ('000110', '425'), ('000120', '171'),
                                 ('000130', '277'), ('000140', '133'),
                                 ('000150', '108'), ('000160', '295'),
                                 ('000170', '273'), ('000180', '367'),
                                 ('000190', '573'), ('000200', '343'),
                                 ('000210', '295'), ('000220', '108'),
                                 ('000230', '117'), ('000240', '153'),
                                 ('000250', '479'), ('000260', '706'),
                                 ('000270', '998'), ('000280', '171'),
                                 ('000290', '108'), ('000300', '295'),
                                 ('000310', '165'), ('000320', '243'),
                                 ('000330', '295'), ('000340', '108'),
                                 ('000350', '1755'), ('000360', '248'),
                                 ('000370', '286'), ('000380', '186'),
                                 ('000390', '83'), ('000400', '153'),
                                 ('000410', '69'), ('000420', '295'),
                                 ('000430', '108'), ('000440', '145'),
                                 ('000450', '59'), ('000460', '124'),
                                 ('000470', '246'), ('000480', '325'),
                                 ('000490', '54'), ('000500', '95'),
                                 ('000510', '83'), ('000520', '56'),
                                 ('000530', '401'), ('000540', '320'),
                                 ('000550', '256'), ('000560', '73'),
                                 ('000570', '144'), ('000580', '258'),
                                 ('000590', '133'), ('000600', '140'),
                                 ('000610', '63'), ('000620', '138'),
                                 ('000630', '68'), ('000640', '169'),
                                 ('000650', '127'), ('000660', '295'),
                                 ('000670', '108'), ('000670', '108')]

        self.PSAE001c01_genes = [('006940', '803'), ('013980', '759'),
                                 ('017350', '600'), ('018920', '776'),
                                 ('026600', '273'), ('031420', '658'),
                                 ('043580', '416'), ('051090', '714'),
                                 ('055870', '449'), ('055880', '447'),
                                 ('055890', '588'), ('055900', '292'),
                                 ('055910', '262'), ('055920', '166'),
                                 ('055930', '288'), ('055940', '194'),
                                 ('055950', '567'), ('055960', '188'),
                                 ('055970', '247'), ('055980', '252'),
                                 ('055990', '455'), ('056000', '450'),
                                 ('056010', '260'), ('056020', '246'),
                                 ('056030', '70'), ('056040', '133'),
                                 ('056050', '284'), ('056060', '585'),
                                 ('056070', '435'), ('056080', '342'),
                                 ('056090', '252'), ('056100', '122'),
                                 ('056110', '213'), ('056120', '400'),
                                 ('056130', '134'), ('056140', '138'),
                                 ('056150', '397'), ('056160', '298'),
                                 ('056170', '186'), ('056180', '445'),
                                 ('056190', '414'), ('056200', '132'),
                                 ('056210', '674'), ('056220', '319'),
                                 ('056230', '394'), ('056240', '207'),
                                 ('056250', '401'), ('056260', '611'),
                                 ('056270', '257'), ('056280', '169'),
                                 ('056290', '454'), ('056300', '141'),
                                 ('056310', '458'), ('056320', '286'),
                                 ('056330', '514'), ('056340', '178'),
                                 ('056350', '156'), ('056360', '85'),
                                 ('056370', '289'), ('056380', '126'),
                                 ('056390', '290'), ('056400', '262'),
                                 ('056410', '214'), ('056420', '630'),
                                 ('056430', '127'), ('056440', '455'),
                                 ('056440', '455')]

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
 def test_find_my_indexes(self):
     idx = Indexes(self.cfg)
     self.assertIsNone(idx.find_my_indexes())
     new_idx = os.path.join(os.path.dirname(self.cfg.sequence_db), idx.name + ".idx")
     open(new_idx, 'w')
     self.assertEqual(idx.find_my_indexes(), new_idx)