def test_build_my_indexes(self): args = argparse.Namespace() args.db_type = 'gembase' args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') if os.path.exists(args.out_dir): shutil.rmtree( os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')) os.makedirs(args.out_dir) seq_db = self.find_data("base", "test_base_with_errors.fa") shutil.copy(seq_db, args.out_dir) args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), args) idx = Indexes(self.cfg) with self.assertRaises(MacsypyError) as e: # the directory for index exist and is writable but # the sequence file is corrupted and cannot be read correctly with self.catch_log(): idx._build_my_indexes(args.out_dir) self.assertTrue( str(e.exception).startswith( "unable to index the sequence dataset:"))
def test_fill_ordered_replicon_min_max(self): self.tearDown() self.cfg = Config(hmmer_exe="hmmsearch", sequence_db=os.path.join(self._data_dir, "base", "ordered_replicon_base"), db_type="ordered_replicon", e_value_res=1, i_evalue_sel=0.5, def_dir=os.path.join(self._data_dir, 'DEF'), res_search_dir='/tmp', res_search_suffix=".search_hmm.out", profile_dir=os.path.join(self._data_dir, 'profiles'), profile_suffix=".hmm", res_extract_suffix="", log_level=30, log_file='/dev/null') shutil.copy(self.cfg.sequence_db, self.cfg.working_dir) self.cfg.options['sequence_db'] = os.path.join( self.cfg.working_dir, os.path.basename(self.cfg.sequence_db)) idx = Indexes(self.cfg) idx._build_my_indexes() RepliconDB.__init__ = self.fake_init db = RepliconDB(self.cfg) db._fill_ordered_min_max(self.cfg.replicon_topology) self.assertEqual(len(db._DB), 1) rep = db[RepliconDB.ordered_replicon_name] self.assertEqual(rep.topology, self.cfg.replicon_topology) self.assertEqual(rep.min, 1) self.assertEqual(rep.max, 52)
def setUp(self): args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.res_search_dir = tempfile.gettempdir() args.log_level = 30 args.out_dir = os.path.join(args.res_search_dir, 'test_macsyfinder_Report') if os.path.exists(args.out_dir): shutil.rmtree(args.out_dir) os.mkdir(args.out_dir) seq_db = self.find_data("base", "test_base.fa") shutil.copy(seq_db, args.out_dir) args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) self.cfg = Config(MacsyDefaults(), args) os.mkdir(os.path.join(self.cfg.out_dir(), self.cfg.hmmer_dir())) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe self.profile_factory = ProfileFactory(self.cfg) idx = Indexes(self.cfg) idx.build()
def parse(self) -> List[LightHit]: """ parse a hmm output file and extract all hits and do some basic computation (coverage profile) :return: The list of extracted hits """ all_hits = [] idx = Indexes(self.cfg) macsyfinder_idx = idx.build() my_db = self._build_my_db(self._hmmer_raw_out) self._fill_my_db(macsyfinder_idx, my_db) with open(self._hmmer_raw_out, 'r') as hmm_out: i_evalue_sel = self.cfg.i_evalue_sel() coverage_threshold = self.cfg.coverage_profile() hmm_hits = (x[1] for x in groupby(hmm_out, self._hit_start)) # drop summary next(hmm_hits) for hmm_hit in hmm_hits: hit_id = self._parse_hmm_header(hmm_hit) seq_lg, position_hit = my_db[hit_id] replicon_name = self._get_replicon_name(hit_id) body = next(hmm_hits) l_hit = self._parse_hmm_body(hit_id, self.gene_profile_lg, seq_lg, coverage_threshold, replicon_name, position_hit, i_evalue_sel, body) all_hits += l_hit hits = sorted(all_hits, key=lambda h: -h.score) return hits
def test_find_my_indexes(self): idx = Indexes(self.cfg) self.assertIsNone(idx.find_my_indexes()) new_idx = os.path.join(os.path.dirname(self.cfg.sequence_db), idx.name + ".idx") open(new_idx, 'w') self.assertEqual(idx.find_my_indexes(), new_idx)
def test_build_no_idx(self): idx = Indexes(self.cfg) my_idx = idx.build() self.assertEqual( my_idx, os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx"))
def setUp(self): self.tmp_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_search_genes') if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) os.mkdir(self.tmp_dir) args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_base.fa") args.db_type = 'gembase' args.models_dir = self.find_data('models') args.log_level = 30 args.out_dir = os.path.join(self.tmp_dir, 'job_1') args.res_search_dir = args.out_dir os.mkdir(args.out_dir) self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) idx = Indexes(self.cfg) idx._build_my_indexes() self.profile_factory = ProfileFactory(self.cfg)
def test_fill_my_db(self): gene_name = "gspD" args = argparse.Namespace() args.db_type = 'gembase' args.models_dir = self.find_data('models') args.log_level = 30 args.sequence_db = self.find_data("base", "test_base.fa") args.index_dir = self.tmpdir cfg = Config(MacsyDefaults(), args) gspD_hmmer_path = self.find_data('hmm', 'gspD.search_hmm.out') idx = Indexes(cfg) macsyfinder_idx = idx.build() hmm_prof = macsyprofile.HmmProfile(gene_name, 596, gspD_hmmer_path, cfg) db = hmm_prof._build_my_db(gspD_hmmer_path) hmm_prof._fill_my_db(macsyfinder_idx, db) self.assertDictEqual( db, { 'PSAE001c01_031420': (658, 73), 'PSAE001c01_051090': (714, 75), 'PSAE001c01_018920': (776, 71), 'PSAE001c01_043580': (416, 74), 'PSAE001c01_017350': (600, 70), 'PSAE001c01_013980': (759, 69), 'PSAE001c01_026600': (273, 72), 'NC_xxxxx_xx_056141': (803, 141), 'PSAE001c01_006940': (803, 68) })
def setUp(self): l = logging.getLogger() l.manager.loggerDict.clear() # add only one handler to the macsypy logger from macsypy.gene import _log macsy_log = _log.parent log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null' log_handler = logging.FileHandler(log_file) macsy_log.addHandler(log_handler) self.cfg = Config(hmmer_exe="hmmsearch", sequence_db=os.path.join(self._data_dir, "base", "test_base.fa"), db_type="gembase", e_value_res=1, i_evalue_sel=0.5, def_dir=os.path.join(self._data_dir, 'DEF'), res_search_dir=tempfile.gettempdir(), res_search_suffix=".search_hmm.out", profile_dir=os.path.join(self._data_dir, 'profiles'), profile_suffix=".hmm", res_extract_suffix="", log_level=30, log_file=log_file) shutil.copy(self.cfg.sequence_db, self.cfg.working_dir) self.cfg.options['sequence_db'] = os.path.join( self.cfg.working_dir, os.path.basename(self.cfg.sequence_db)) self.profile_registry = ProfilesRegistry(self.cfg) idx = Indexes(self.cfg) idx._build_my_indexes()
def test_fill_ordered_replicon_min_max(self): self.tearDown() self.cfg = Config( hmmer_exe = "hmmsearch", sequence_db = os.path.join(self._data_dir, "base", "ordered_replicon_base"), db_type = "ordered_replicon", e_value_res = 1, i_evalue_sel = 0.5, def_dir = os.path.join(self._data_dir, 'DEF'), res_search_dir = tempfile.gettempdir(), res_search_suffix = ".search_hmm.out", profile_dir = os.path.join(self._data_dir, 'profiles'), profile_suffix = ".hmm", res_extract_suffix = "", log_level = 30, log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null' ) shutil.copy(self.cfg.sequence_db, self.cfg.working_dir) self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db)) idx = Indexes(self.cfg) idx._build_my_indexes() RepliconDB.__init__ = self.fake_init db = RepliconDB(self.cfg) db._fill_ordered_min_max(self.cfg.replicon_topology) self.assertEqual(len(db._DB), 1) rep = db[RepliconDB.ordered_replicon_name] self.assertEqual(rep.topology, self.cfg.replicon_topology) self.assertEqual(rep.min, 1) self.assertEqual(rep.max, 52)
def setUp(self): l = logging.getLogger() l.manager.loggerDict.clear() # add only one handler to the macsypy logger from macsypy.gene import _log macsy_log = _log.parent log_file = "NUL" if platform.system() == "Windows" else "/dev/null" log_handler = logging.FileHandler(log_file) macsy_log.addHandler(log_handler) self.cfg = Config( hmmer_exe="hmmsearch", sequence_db=os.path.join(self._data_dir, "base", "test_base.fa"), db_type="gembase", e_value_res=1, i_evalue_sel=0.5, def_dir=os.path.join(self._data_dir, "DEF"), res_search_dir=tempfile.gettempdir(), res_search_suffix=".search_hmm.out", profile_dir=os.path.join(self._data_dir, "profiles"), profile_suffix=".hmm", res_extract_suffix="", log_level=30, log_file=log_file, ) shutil.copy(self.cfg.sequence_db, self.cfg.working_dir) self.cfg.options["sequence_db"] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db)) self.profile_registry = ProfilesRegistry(self.cfg) idx = Indexes(self.cfg) idx._build_my_indexes()
def test_iter(self): idx = Indexes(self.cfg) with self.assertRaises(MacsypyError) as ctx: next(iter(idx)) self.assertEqual(str(ctx.exception), 'Build index before to use it.') idx.build() expected_idx = [('VICH001.B.00001.C001_01359', 200, 1), ('VICH001.B.00001.C001_01360', 484, 2), ('VICH001.B.00001.C001_01361', 406, 3), ('VICH001.B.00001.C001_01390', 326, 4), ('VICH001.B.00001.C001_01391', 54, 5), ('VICH001.B.00001.C001_01392', 206, 6), ('VICH001.B.00001.C001_01393', 477, 7), ('VICH001.B.00001.C001_01394', 126, 8), ('VICH001.B.00001.C001_01395', 405, 9), ('VICH001.B.00001.C001_01396', 572, 10), ('VICH001.B.00001.C001_01397', 721, 11), ('VICH001.B.00001.C001_01398', 467, 12), ('VICH001.B.00001.C001_01399', 720, 13), ('VICH001.B.00001.C001_01400', 559, 14), ('VICH001.B.00001.C001_01401', 153, 15), ('VICH001.B.00001.C001_01402', 4558, 16), ('VICH001.B.00001.C001_01500', 120, 17), ('VICH001.B.00001.C001_01501', 344, 18), ('VICH001.B.00001.C001_01502', 478, 19), ('VICH001.B.00001.C001_01503', 724, 20), ('VICH001.B.00001.C001_01504', 309, 21), ('VICH001.B.00001.C001_01505', 390, 22), ('VICH001.B.00001.C001_01506', 419, 23), ('VICH001.B.00001.C001_01540', 353, 24), ('VICH001.B.00001.C001_01541', 229, 25), ('VICH001.B.00001.C001_01542', 267, 26), ('VICH001.B.00001.C001_01543', 328, 27), ('VICH001.B.00001.C001_01544', 258, 28), ('VICH001.B.00001.C001_01545', 228, 29), ('VICH001.B.00001.C001_01546', 538, 30), ('VICH001.B.00001.C001_01547', 77, 31), ('VICH001.B.00001.C001_01548', 476, 32), ('VICH001.B.00001.C001_01549', 324, 33), ('VICH001.B.00001.C001_01550', 387, 34), ('VICH001.B.00001.C001_01551', 382, 35), ('VICH001.B.00001.C001_01552', 149, 36), ('VICH001.B.00001.C001_01553', 319, 37), ('VICH001.B.00001.C001_01554', 237, 38), ('VICH001.B.00001.C001_01555', 74, 39), ('VICH001.B.00001.C001_01556', 362, 40), ('VICH001.B.00001.C001_01557', 170, 41), ('VICH001.B.00001.C001_01558', 77, 42), ('VICH001.B.00001.C001_01559', 296, 43), ('VICH001.B.00001.C001_01560', 405, 44), ('VICH001.B.00001.C001_01561', 182, 45), ('VICH001.B.00001.C001_01562', 445, 46), ('VICH001.B.00001.C001_01563', 212, 47), ('VICH001.B.00001.C001_01564', 387, 48), ('VICH001.B.00001.C001_01565', 414, 49)] self.assertListEqual(list(iter(idx)), expected_idx)
def test_build_no_idx(self): if not which('makeblastdb') and which('formatdb'): self.cfg.options['index_db_exe'] = 'formatdb' idx = Indexes(self.cfg) idx.build() my_idx = idx.find_my_indexes() hmmer_idx = idx.find_hmmer_indexes() self.assertEqual(my_idx, os.path.join( os.path.dirname(self.cfg.sequence_db), idx.name + ".idx")) self.assertEqual( hmmer_idx , [ self.cfg.sequence_db + suffix for suffix in ('.phr', '.pin', '.psd', '.psi', '.psq')])
def test_find_hmmer_indexes_all_files(self): idx = Indexes(self.cfg) suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq') files_2_find = [] for s in suffixes: new_idx = os.path.join(self.cfg.sequence_db + s) open(new_idx, 'w') files_2_find.append(new_idx) hmmer_idx = idx.find_hmmer_indexes() self.assertListEqual(hmmer_idx, files_2_find)
def setUp(self): l = logging.getLogger() l.manager.loggerDict.clear() self.cfg = Config( hmmer_exe = "hmmsearch", sequence_db = os.path.join(self._data_dir, "base", "test_base.fa"), db_type = "gembase", e_value_res = 1, i_evalue_sel = 0.5, def_dir = os.path.join(self._data_dir, 'DEF'), res_search_dir = tempfile.gettempdir(), res_search_suffix = ".search_hmm.out", profile_dir = os.path.join(self._data_dir, 'profiles'), profile_suffix = ".hmm", res_extract_suffix = "", log_level = 30, log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null' ) shutil.copy(self.cfg.sequence_db, self.cfg.working_dir) self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db)) self.ESCO030p01_genes = [('000010', '886'), ('000020', '291'), ('000030', '656'), ('000040', '500'), ('000050', '407'), ('000060', '144'), ('000070', '183'), ('000080', '121'), ('000090', '199'), ('000100', '325'), ('000110', '425'), ('000120', '171'), ('000130', '277'), ('000140', '133'), ('000150', '108'), ('000160', '295'), ('000170', '273'), ('000180', '367'), ('000190', '573'), ('000200', '343'), ('000210', '295'), ('000220', '108'), ('000230', '117'), ('000240', '153'), ('000250', '479'), ('000260', '706'), ('000270', '998'), ('000280', '171'), ('000290', '108'), ('000300', '295'), ('000310', '165'), ('000320', '243'), ('000330', '295'), ('000340', '108'), ('000350', '1755'), ('000360', '248'), ('000370', '286'), ('000380', '186'), ('000390', '83'), ('000400', '153'), ('000410', '69'), ('000420', '295'), ('000430', '108'), ('000440', '145'), ('000450', '59'), ('000460', '124'), ('000470', '246'), ('000480', '325'), ('000490', '54'), ('000500', '95'), ('000510', '83'), ('000520', '56'), ('000530', '401'), ('000540', '320'), ('000550', '256'), ('000560', '73'), ('000570', '144'), ('000580', '258'), ('000590', '133'), ('000600', '140'), ('000610', '63'), ('000620', '138'), ('000630', '68'), ('000640', '169'), ('000650', '127'), ('000660', '295'), ('000670', '108'), ('000670', '108')] self.PSAE001c01_genes = [('006940', '803'), ('013980', '759'), ('017350', '600'), ('018920', '776'), ('026600', '273'), ('031420', '658'), ('043580', '416'), ('051090', '714'), ('055870', '449'), ('055880', '447'), ('055890', '588'), ('055900', '292'), ('055910', '262'), ('055920', '166'), ('055930', '288'), ('055940', '194'), ('055950', '567'), ('055960', '188'), ('055970', '247'), ('055980', '252'), ('055990', '455'), ('056000', '450'), ('056010', '260'), ('056020', '246'), ('056030', '70'), ('056040', '133'), ('056050', '284'), ('056060', '585'), ('056070', '435'), ('056080', '342'), ('056090', '252'), ('056100', '122'), ('056110', '213'), ('056120', '400'), ('056130', '134'), ('056140', '138'), ('056150', '397'), ('056160', '298'), ('056170', '186'), ('056180', '445'), ('056190', '414'), ('056200', '132'), ('056210', '674'), ('056220', '319'), ('056230', '394'), ('056240', '207'), ('056250', '401'), ('056260', '611'), ('056270', '257'), ('056280', '169'), ('056290', '454'), ('056300', '141'), ('056310', '458'), ('056320', '286'), ('056330', '514'), ('056340', '178'), ('056350', '156'), ('056360', '85'), ('056370', '289'), ('056380', '126'), ('056390', '290'), ('056400', '262'), ('056410', '214'), ('056420', '630'), ('056430', '127'), ('056440', '455'), ('056440', '455')] idx = Indexes(self.cfg) idx._build_my_indexes()
def _fill_my_db(self, macsyfinder_idx: str, db: Dict) -> None: """ Fill the dictionary with information on the matched sequences :param macsyfinder_idx: the path the macsyfinder index corresponding to the dataset :type macsyfinder_idx: string :param db: the database containing all sequence id of the hits. :type db: dict """ idx = Indexes(self.cfg) idx.build() for seqid, length, rank in idx: if seqid in db: db[seqid] = (length, rank)
def test_build_not_writable(self): # Skip test on Windows, since setting the folder permissions is not affecting files inside # in Singularity container tess are run as root and this test as non sense idx = Indexes(self.cfg) idx_dir = os.path.join(os.path.dirname(self.cfg.sequence_db())) os.chmod(idx_dir, 0000) try: with self.assertRaises(IOError) as ctx: with self.catch_log(): idx.build() self.assertRegex(str(ctx.exception), "cannot build indexes, \(.+/test_macsyfinder_indexes\) is not writable") finally: os.chmod(idx_dir, 0o777)
def test_build_with_idx(self): if not which('makeblastdb') and which('formatdb'): self.cfg.options['index_db_exe'] = 'formatdb' #put fake hmmer indexes suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq') for s in suffixes: new_idx = os.path.join( self.cfg.sequence_db + s) open(new_idx, 'w') idx = Indexes(self.cfg) new_idx = open(os.path.join( os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"), 'w') idx.build() my_idx = idx.find_my_indexes() hmmer_idx = idx.find_hmmer_indexes() for f in hmmer_idx +[my_idx]: self.assertEqual(os.path.getsize(f), 0)
def test_build_not_writable(self): # Skip test on Windows, since setting the folder permissions is not affecting files inside # in Singularity container tess are run as root and this test as non sense idx = Indexes(self.cfg) idx_dir = os.path.join(os.path.dirname(self.cfg.sequence_db)) os.chmod(idx_dir, 0000) self.assertRaises(IOError, idx.build) os.chmod(idx_dir, 0777)
def test_find_hmmer_indexes_all_files_and_virtual(self): idx = Indexes(self.cfg) #tester index + pal suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq') files_2_find = [] for s in suffixes: for i in range(2): new_idx = os.path.join("%s.%d.%s" %(self.cfg.sequence_db, i, s)) open(new_idx, 'w') files_2_find.append(new_idx) new_idx = os.path.join(self.cfg.sequence_db + '.pal') open(new_idx, 'w') files_2_find.append(new_idx) files_2_find.sort() hmmer_idx = idx.find_hmmer_indexes() hmmer_idx.sort() self.assertListEqual(hmmer_idx, files_2_find)
def test_find_hmmer_indexes_all_files_and_virtual(self): idx = Indexes(self.cfg) # tester index + pal suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq') files_2_find = [] for s in suffixes: for i in range(2): new_idx = os.path.join("{0}.{1:d}.{2}".format(self.cfg.sequence_db, i, s)) open(new_idx, 'w') files_2_find.append(new_idx) new_idx = os.path.join(self.cfg.sequence_db + '.pal') open(new_idx, 'w') files_2_find.append(new_idx) files_2_find.sort() hmmer_idx = idx.find_hmmer_indexes() hmmer_idx.sort() self.assertListEqual(hmmer_idx, files_2_find)
def test_find_hmmer_indexes_all_files_and_pal(self): idx = Indexes(self.cfg) # tester tous les fichiers + pal suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq', '.pal') for s in suffixes: new_idx = os.path.join(self.cfg.sequence_db + s) open(new_idx, 'w') self.assertRaises(RuntimeError, idx.find_hmmer_indexes)
def test_find_hmmer_indexes_lack_pal(self): idx = Indexes(self.cfg) # tester plusieurs index pas de pal suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq') for s in suffixes: for i in range(2): new_idx = os.path.join(self.cfg.sequence_db + str(i) + s) open(new_idx, 'w') self.assertRaises(RuntimeError, idx.find_hmmer_indexes)
def test_fill_gembase_min_max_oredered_replicon(self): seq_ori = self.find_data("base", "ordered_replicon_base.fasta") shutil.copy(seq_ori, self.args.out_dir) self.args.sequence_db = os.path.join(self.args.out_dir, os.path.basename(seq_ori)) cfg = Config(MacsyDefaults(), self.args) idx = Indexes(cfg) idx.build() RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) with self.assertRaises(MacsypyError) as ctx: with self.catch_log() as log: db._fill_gembase_min_max({}, self.cfg.replicon_topology()) self.assertEqual( str(ctx.exception), f"Error during sequence-db '{self.args.sequence_db}' parsing. " f"Are you sure db-type is 'gembase'?")
def test_fill_ordered_replicon_min_max(self): seq_ori = self.find_data("base", "ordered_replicon_base.fasta") shutil.copy(seq_ori, self.args.out_dir) self.args.sequence_db = os.path.join(self.args.out_dir, os.path.basename(seq_ori)) cfg = Config(MacsyDefaults(), self.args) idx = Indexes(cfg) idx.build() RepliconDB.__init__ = self.fake_init db = RepliconDB(cfg) db._fill_ordered_min_max(cfg.replicon_topology()) self.assertEqual(len(db._DB), 1) rep = db[RepliconDB.ordered_replicon_name] self.assertEqual(rep.topology, cfg.replicon_topology()) self.assertEqual(rep.min, 1) self.assertEqual(rep.max, 52)
def test_build_not_writable(self): idx = Indexes(self.cfg) idx_dir = os.path.join( os.path.dirname(self.cfg.sequence_db)) # Skip test on Windows, since setting the folder permissions is not affecting files inside if platform.system() != 'Windows': os.chmod(idx_dir, 0000) self.assertRaises(IOError, idx.build) os.chmod(idx_dir, 0777)
def test_find_hmmer_indexes_some_files(self): idx = Indexes(self.cfg) #tester pas tous les fichiers suffixes = ('.phr', '.pin', '.psd', '.psi') files_2_find = [] for s in suffixes: new_idx = os.path.join(self.cfg.sequence_db + s) open(new_idx, 'w') self.assertRaises(RuntimeError, idx.find_hmmer_indexes)
def test_find_hmmer_indexes_all_files_and_2virtual(self): idx = Indexes(self.cfg) #tester 1 fichier index + pal suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq', '.pal') files_2_find = [] for s in suffixes: for i in range(2): new_idx = os.path.join(self.cfg.sequence_db + str(i) + s) open(new_idx, 'w') files_2_find.append(new_idx) self.assertRaises(RuntimeError, idx.find_hmmer_indexes)
def setUp(self): self.cfg = Config( hmmer_exe = "hmmsearch", sequence_db = os.path.join(self._data_dir, "base", "test_base.fa"), db_type = "gembase", e_value_res = 1, i_evalue_sel = 0.5, def_dir = os.path.join(self._data_dir, 'DEF'), res_search_dir = '/tmp', res_search_suffix = ".search_hmm.out", profile_dir = os.path.join(self._data_dir, 'profiles'), profile_suffix = ".hmm", res_extract_suffix = "", log_level = 30, log_file = '/dev/null' ) shutil.copy(self.cfg.sequence_db, self.cfg.working_dir) self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db)) self.profile_registry = ProfilesRegistry(self.cfg) idx = Indexes(self.cfg) idx._build_my_indexes()
def setUp(self): self.cfg = Config(hmmer_exe="hmmsearch", sequence_db=os.path.join(self._data_dir, "base", "test_base.fa"), db_type="gembase", e_value_res=1, i_evalue_sel=0.5, def_dir=os.path.join(self._data_dir, 'DEF'), res_search_dir='/tmp', res_search_suffix=".search_hmm.out", profile_dir=os.path.join(self._data_dir, 'profiles'), profile_suffix=".hmm", res_extract_suffix="", log_level=30, log_file='/dev/null') shutil.copy(self.cfg.sequence_db, self.cfg.working_dir) self.cfg.options['sequence_db'] = os.path.join( self.cfg.working_dir, os.path.basename(self.cfg.sequence_db)) self.profile_registry = ProfilesRegistry(self.cfg) idx = Indexes(self.cfg) idx._build_my_indexes()
def setUp(self): l = logging.getLogger() l.manager.loggerDict.clear() self.cfg = Config( hmmer_exe = "hmmsearch", sequence_db = os.path.join(self._data_dir, "base", "test_base.fa"), db_type = "gembase", e_value_res = 1, i_evalue_sel = 0.5, def_dir = os.path.join(self._data_dir, 'DEF'), res_search_dir = tempfile.gettempdir(), res_search_suffix = ".search_hmm.out", profile_dir = os.path.join(self._data_dir, 'profiles'), profile_suffix = ".hmm", res_extract_suffix = "", log_level = 30, log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null' ) shutil.copy(self.cfg.sequence_db, self.cfg.working_dir) self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db)) self.profile_registry = ProfilesRegistry(self.cfg) idx = Indexes(self.cfg) idx._build_my_indexes()
def test_build_no_idx(self): if not which('makeblastdb') and which('formatdb'): self.cfg.options['index_db_exe'] = 'formatdb' idx = Indexes(self.cfg) idx.build() my_idx = idx.find_my_indexes() hmmer_idx = idx.find_hmmer_indexes() self.assertEqual(my_idx, os.path.join(os.path.dirname(self.cfg.sequence_db), idx.name + ".idx")) self.assertEqual(hmmer_idx, [self.cfg.sequence_db + suffix for suffix in ('.phr', '.pin', '.psd', '.psi', '.psq')])
def test_build_my_indexes(self): args = argparse.Namespace() args.db_type = 'gembase' args.e_value_res = 1 args.i_evalue_sel = 0.5 args.models_dir = self.find_data('models') args.res_search_suffix = '' args.log_level = 30 args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes') if os.path.exists(args.out_dir): shutil.rmtree(os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')) os.makedirs(args.out_dir) seq_db = self.find_data("base", "test_base_with_errors.fa") shutil.copy(seq_db, args.out_dir) args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db)) cfg = Config(MacsyDefaults(), args) idx = Indexes(cfg) with self.assertRaises(MacsypyError) as e: with self.catch_log(): idx._build_my_indexes() self.assertTrue(str(e.exception).startswith("unable to index the sequence dataset:"))
def test_fill_my_db(self): gene_name = "gspD" c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) report_path = os.path.join(self.cfg.working_dir(), gene_name + self.cfg.res_search_suffix()) report = GembaseHMMReport(c_gene, report_path, self.cfg) idx = Indexes(self.cfg) macsyfinder_idx = idx.find_my_indexes() gspD_hmmer_path = self.find_data( os.path.join('hmm', 'gspD.search_hmm.out')) db = report._build_my_db(gspD_hmmer_path) report._fill_my_db(macsyfinder_idx, db) self.assertDictEqual( db, { 'PSAE001c01_031420': (658, 73), 'PSAE001c01_051090': (714, 75), 'PSAE001c01_018920': (776, 71), 'PSAE001c01_043580': (416, 74), 'PSAE001c01_017350': (600, 70), 'PSAE001c01_013980': (759, 69), 'PSAE001c01_026600': (273, 72), 'NC_xxxxx_xx_056141': (803, 141), 'PSAE001c01_006940': (803, 68) })
def test_build_with_idx(self): if not which('makeblastdb') and which('formatdb'): self.cfg.options['index_db_exe'] = 'formatdb' # put fake hmmer indexes suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq') for s in suffixes: new_idx = os.path.join(self.cfg.sequence_db + s) open(new_idx, 'w') idx = Indexes(self.cfg) new_idx = open(os.path.join( os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"), 'w') idx.build() my_idx = idx.find_my_indexes() hmmer_idx = idx.find_hmmer_indexes() for f in hmmer_idx + [my_idx]: self.assertEqual(os.path.getsize(f), 0)
def test_build_force(self): # put fake hmmer indexes if not which('makeblastdb') and which('formatdb'): self.cfg.options['index_db_exe'] = 'formatdb' suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq') for s in suffixes: new_idx = os.path.join( self.cfg.sequence_db + s) open(new_idx, 'w') idx = Indexes(self.cfg) idx.build(force=True) my_idx = idx.find_my_indexes() hmmer_idx = idx.find_hmmer_indexes() for f in hmmer_idx + [my_idx]: self.assertNotEqual(os.path.getsize(f), 0)
def fake_init(obj, cfg): obj.cfg = cfg idx = Indexes(self.cfg) obj.sequence_idx = idx.find_my_indexes() obj.topology_file = self.cfg.topology_file obj._DB = {}
def test_find_hmmer_indexes_no_files(self): idx = Indexes(self.cfg) #tester pas de fichier hmmer_idx = idx.find_hmmer_indexes() self.assertListEqual(hmmer_idx, [])
def setUp(self): self.cfg = Config(hmmer_exe="hmmsearch", sequence_db=os.path.join(self._data_dir, "base", "test_base.fa"), db_type="gembase", e_value_res=1, i_evalue_sel=0.5, def_dir=os.path.join(self._data_dir, 'DEF'), res_search_dir='/tmp', res_search_suffix=".search_hmm.out", profile_dir=os.path.join(self._data_dir, 'profiles'), profile_suffix=".hmm", res_extract_suffix="", log_level=30, log_file='/dev/null') shutil.copy(self.cfg.sequence_db, self.cfg.working_dir) self.cfg.options['sequence_db'] = os.path.join( self.cfg.working_dir, os.path.basename(self.cfg.sequence_db)) self.ESCO030p01_genes = [('000010', '886'), ('000020', '291'), ('000030', '656'), ('000040', '500'), ('000050', '407'), ('000060', '144'), ('000070', '183'), ('000080', '121'), ('000090', '199'), ('000100', '325'), ('000110', '425'), ('000120', '171'), ('000130', '277'), ('000140', '133'), ('000150', '108'), ('000160', '295'), ('000170', '273'), ('000180', '367'), ('000190', '573'), ('000200', '343'), ('000210', '295'), ('000220', '108'), ('000230', '117'), ('000240', '153'), ('000250', '479'), ('000260', '706'), ('000270', '998'), ('000280', '171'), ('000290', '108'), ('000300', '295'), ('000310', '165'), ('000320', '243'), ('000330', '295'), ('000340', '108'), ('000350', '1755'), ('000360', '248'), ('000370', '286'), ('000380', '186'), ('000390', '83'), ('000400', '153'), ('000410', '69'), ('000420', '295'), ('000430', '108'), ('000440', '145'), ('000450', '59'), ('000460', '124'), ('000470', '246'), ('000480', '325'), ('000490', '54'), ('000500', '95'), ('000510', '83'), ('000520', '56'), ('000530', '401'), ('000540', '320'), ('000550', '256'), ('000560', '73'), ('000570', '144'), ('000580', '258'), ('000590', '133'), ('000600', '140'), ('000610', '63'), ('000620', '138'), ('000630', '68'), ('000640', '169'), ('000650', '127'), ('000660', '295'), ('000670', '108'), ('000670', '108')] self.PSAE001c01_genes = [('006940', '803'), ('013980', '759'), ('017350', '600'), ('018920', '776'), ('026600', '273'), ('031420', '658'), ('043580', '416'), ('051090', '714'), ('055870', '449'), ('055880', '447'), ('055890', '588'), ('055900', '292'), ('055910', '262'), ('055920', '166'), ('055930', '288'), ('055940', '194'), ('055950', '567'), ('055960', '188'), ('055970', '247'), ('055980', '252'), ('055990', '455'), ('056000', '450'), ('056010', '260'), ('056020', '246'), ('056030', '70'), ('056040', '133'), ('056050', '284'), ('056060', '585'), ('056070', '435'), ('056080', '342'), ('056090', '252'), ('056100', '122'), ('056110', '213'), ('056120', '400'), ('056130', '134'), ('056140', '138'), ('056150', '397'), ('056160', '298'), ('056170', '186'), ('056180', '445'), ('056190', '414'), ('056200', '132'), ('056210', '674'), ('056220', '319'), ('056230', '394'), ('056240', '207'), ('056250', '401'), ('056260', '611'), ('056270', '257'), ('056280', '169'), ('056290', '454'), ('056300', '141'), ('056310', '458'), ('056320', '286'), ('056330', '514'), ('056340', '178'), ('056350', '156'), ('056360', '85'), ('056370', '289'), ('056380', '126'), ('056390', '290'), ('056400', '262'), ('056410', '214'), ('056420', '630'), ('056430', '127'), ('056440', '455'), ('056440', '455')] idx = Indexes(self.cfg) idx._build_my_indexes()