示例#1
0
 def test_build_no_idx(self):
     if not which('makeblastdb') and which('formatdb'):
         self.cfg.options['index_db_exe'] = 'formatdb'
     idx = Indexes(self.cfg)
     idx.build()
     my_idx = idx.find_my_indexes()
     hmmer_idx = idx.find_hmmer_indexes()
     self.assertEqual(my_idx, os.path.join(os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"))
     self.assertEqual(hmmer_idx, [self.cfg.sequence_db + suffix for suffix in ('.phr', '.pin', '.psd', '.psi', '.psq')])
示例#2
0
 def test_build_force(self):
     # put fake hmmer indexes
     if not which('makeblastdb') and which('formatdb'):
         self.cfg.options['index_db_exe'] = 'formatdb'
    
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     for s in suffixes:
         new_idx = os.path.join( self.cfg.sequence_db + s)
         open(new_idx, 'w')
     idx = Indexes(self.cfg)
     idx.build(force=True)
     my_idx = idx.find_my_indexes()
     hmmer_idx = idx.find_hmmer_indexes()
     for f in hmmer_idx + [my_idx]:
         self.assertNotEqual(os.path.getsize(f), 0)
示例#3
0
 def test_build_with_idx(self):
     if not which('makeblastdb') and which('formatdb'):
         self.cfg.options['index_db_exe'] = 'formatdb'
     # put fake hmmer indexes
     suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
     for s in suffixes:
         new_idx = os.path.join(self.cfg.sequence_db + s)
         open(new_idx, 'w')
     idx = Indexes(self.cfg)
     new_idx = open(os.path.join( os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"), 'w')
     idx.build()
     my_idx = idx.find_my_indexes()
     hmmer_idx = idx.find_hmmer_indexes()
     for f in hmmer_idx + [my_idx]:
         self.assertEqual(os.path.getsize(f), 0)
示例#4
0
class Test(MacsyTest):

    def __init__(self, methodName = 'runTest'):
        super(Test, self).__init__(methodName)

        def fake_init(obj, cfg):
            obj.cfg = cfg
            obj._fasta_path = cfg.sequence_db
            obj.name = os.path.basename(cfg.sequence_db)
        self.fake_init = fake_init
        self.real_init = Indexes.__init__

    def setUp(self):
        l = logging.getLogger()
        l.manager.loggerDict.clear()
        
        # add only one handler to the macsypy logger
        from macsypy.database import _log
        macsy_log = _log.parent
        log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null'
        log_handler = logging.FileHandler(log_file)
        macsy_log.addHandler(log_handler)
        
        self.cfg = Config(hmmer_exe="hmmsearch",
                          sequence_db=os.path.join(self._data_dir, "base", "test_base.fa"),
                          db_type="gembase",
                          e_value_res=1,
                          i_evalue_sel=0.5,
                          def_dir=os.path.join(self._data_dir, "DEF"),
                          res_search_dir=tempfile.gettempdir(),
                          res_search_suffix=".search_hmm.out",
                          profile_dir=os.path.join(self._data_dir, "profiles"),
                          profile_suffix=".hmm",
                          res_extract_suffix="",
                          log_level=30,
                          log_file=log_file
                          )

        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))


    def tearDown(self):
        # close loggers filehandles, so they don't block file deletion
        # in shutil.rmtree calls in Windows
        logging.shutdown()
        l = logging.getLogger()
        l.manager.loggerDict.clear()
        try:
            shutil.rmtree(self.cfg.working_dir)
        except:
            pass

    def test_find_hmmer_indexes_no_files(self):
        idx = Indexes(self.cfg)
        # tester pas de fichier
        hmmer_idx = idx.find_hmmer_indexes()
        self.assertListEqual(hmmer_idx, [])

    def test_find_hmmer_indexes_all_files(self):
        idx = Indexes(self.cfg)
        suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
        files_2_find = []
        for s in suffixes:
            new_idx = os.path.join(self.cfg.sequence_db + s)
            open(new_idx, 'w')
            files_2_find.append(new_idx)
        hmmer_idx = idx.find_hmmer_indexes()
        self.assertListEqual(hmmer_idx, files_2_find)


    def test_find_hmmer_indexes_all_files_and_pal(self):
        idx = Indexes(self.cfg)
        # tester tous les fichiers + pal
        suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq', '.pal')
        for s in suffixes:
            new_idx = os.path.join(self.cfg.sequence_db + s)
            open(new_idx, 'w')
        self.assertRaises(RuntimeError, idx.find_hmmer_indexes)


    def test_find_hmmer_indexes_some_files(self):
        idx = Indexes(self.cfg)
        # tester pas tous les fichiers
        suffixes = ('.phr', '.pin', '.psd', '.psi')
        for s in suffixes:
            new_idx = os.path.join(self.cfg.sequence_db + s)
            open(new_idx, 'w')
        self.assertRaises(RuntimeError, idx.find_hmmer_indexes)


    def test_find_hmmer_indexes_lack_pal(self):
        idx = Indexes(self.cfg)
        # tester plusieurs index pas de pal
        suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
        for s in suffixes:
            for i in range(2):
                new_idx = os.path.join(self.cfg.sequence_db + str(i) + s)
                open(new_idx, 'w')
        self.assertRaises(RuntimeError, idx.find_hmmer_indexes)


    def test_find_hmmer_indexes_all_files_and_2virtual(self):
        idx = Indexes(self.cfg)
        # tester 1 fichier index + pal
        suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq', '.pal')
        files_2_find = []
        for s in suffixes:
            for i in range(2):
                new_idx = os.path.join(self.cfg.sequence_db + str(i) + s)
                open(new_idx, 'w')
                files_2_find.append(new_idx)
        self.assertRaises(RuntimeError, idx.find_hmmer_indexes)


    def test_find_hmmer_indexes_all_files_and_virtual(self):
        idx = Indexes(self.cfg)
        # tester index + pal
        suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
        files_2_find = []
        for s in suffixes:
            for i in range(2):
                new_idx = os.path.join("{0}.{1:d}.{2}".format(self.cfg.sequence_db, i, s))
                open(new_idx, 'w')
                files_2_find.append(new_idx)
        new_idx = os.path.join(self.cfg.sequence_db + '.pal')
        open(new_idx, 'w')
        files_2_find.append(new_idx)
        files_2_find.sort()
        hmmer_idx = idx.find_hmmer_indexes()
        hmmer_idx.sort()
        self.assertListEqual(hmmer_idx, files_2_find)


    def test_find_my_indexes(self):
        idx = Indexes(self.cfg)
        self.assertIsNone(idx.find_my_indexes())
        new_idx = os.path.join(os.path.dirname(self.cfg.sequence_db), idx.name + ".idx")
        open(new_idx, 'w')
        self.assertEqual(idx.find_my_indexes(), new_idx)

    @unittest.skipIf(not (which('makeblastdb') or which('formatdb')), 'neither makeblast nor formatdb found in PATH')
    def test_build_no_idx(self):
        if not which('makeblastdb') and which('formatdb'):
            self.cfg.options['index_db_exe'] = 'formatdb'
        idx = Indexes(self.cfg)
        idx.build()
        my_idx = idx.find_my_indexes()
        hmmer_idx = idx.find_hmmer_indexes()
        self.assertEqual(my_idx, os.path.join(os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"))
        self.assertEqual(hmmer_idx, [self.cfg.sequence_db + suffix for suffix in ('.phr', '.pin', '.psd', '.psi', '.psq')])
        

    @unittest.skipIf(not (which('makeblastdb') or which('formatdb')), 'neither makeblast nor formatdb found in PATH')
    def test_build_with_idx(self):
        if not which('makeblastdb') and which('formatdb'):
            self.cfg.options['index_db_exe'] = 'formatdb'
        # put fake hmmer indexes
        suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
        for s in suffixes:
            new_idx = os.path.join(self.cfg.sequence_db + s)
            open(new_idx, 'w')
        idx = Indexes(self.cfg)
        new_idx = open(os.path.join( os.path.dirname(self.cfg.sequence_db), idx.name + ".idx"), 'w')
        idx.build()
        my_idx = idx.find_my_indexes()
        hmmer_idx = idx.find_hmmer_indexes()
        for f in hmmer_idx + [my_idx]:
            self.assertEqual(os.path.getsize(f), 0)

    @unittest.skipIf(not (which('makeblastdb') or which('formatdb')), 'neither makeblast nor formatdb found in PATH')
    def test_build_force(self):
        # put fake hmmer indexes
        if not which('makeblastdb') and which('formatdb'):
            self.cfg.options['index_db_exe'] = 'formatdb'
       
        suffixes = ('.phr', '.pin', '.psd', '.psi', '.psq')
        for s in suffixes:
            new_idx = os.path.join( self.cfg.sequence_db + s)
            open(new_idx, 'w')
        idx = Indexes(self.cfg)
        idx.build(force=True)
        my_idx = idx.find_my_indexes()
        hmmer_idx = idx.find_hmmer_indexes()
        for f in hmmer_idx + [my_idx]:
            self.assertNotEqual(os.path.getsize(f), 0)
            
    @unittest.skipIf(platform.system() == 'Windows' or os.getuid() == 0, 'Skip test on Windows or if run as root')
    def test_build_not_writable(self):
        # Skip test on Windows, since setting the folder permissions is not affecting files inside
        # in Singularity container tess are run as root and this test as non sense
        idx = Indexes(self.cfg)
        idx_dir = os.path.join(os.path.dirname(self.cfg.sequence_db))
        os.chmod(idx_dir, 0000)
        self.assertRaises(IOError, idx.build)
        os.chmod(idx_dir, 0777)
    def test_basic_run(self):
        """
        test if returncode of macsyfinder is 0 and
        test each element of the json
        macsyfinder is launched to search T9SS T3SS T4SS_typeI systems
        with test_aesu.fa sequence db in gembase format
        """
        self.out_dir = os.path.join(self.tmp_dir, 'macsyfinder_test_basic_run')
        os.makedirs(self.out_dir)
        macsy_bin = os.path.join(self.macsy_home, 'bin', 'macsyfinder') if self.local_install else which('macsyfinder')

        command = "{bin} --def={def_dir} --profile-dir={profiles} --out-dir={out_dir} --sequence-db={seq_db} --db-type=gembase {systems}".format(
                    bin=macsy_bin,
                    out_dir=self.out_dir,
                    def_dir=os.path.join(self._data_dir, 'data_set_1', 'def'),
                    profiles=os.path.join(self._data_dir, 'data_set_1', 'profiles'),
                    seq_db=os.path.join(self._data_dir, 'base', 'test_aesu.fa'),
                    systems="T9SS T3SS T4SS_typeI",
                    )
        if not bin:
            raise RuntimeError('macsyfinder not found, macsyfinder must be either in your path or MACSY_HOME must be defined')
        # I redirect stdout and stderr in dev null I don't want them on screen
        # I cannot redirect them in output directory as --out-dir expect a non existing directory or an empty one
        # but Popen need to have a file as argument of stdout/err

        # I need to prepend the command by setsid because macsyfinder use killpg with group_id to terminated all
        # threads and subprocess when an error occurred in one hmmsearch. It's work fine but when
        # macsyfinder is launched by the tests.py srcipt the kill group kill also the tests.py script
        # so we must run macsyfinder in a new process group
        try:
            macsy_process = Popen("setsid " + command,
                                  shell=True,
                                  stdin=None,
                                  stdout=open(os.devnull, 'w'),
                                  stderr=open(os.devnull, 'w'),
                                  close_fds=False
                                  )
        except Exception as err:
            msg = "macsyfinder execution failed: command = {0} : {1}".format(command, err)
            print
            print msg
            raise err

        macsy_process.wait()
        self.assertEqual(macsy_process.returncode, 0,
                         "macsyfinder finished with non zero exit code: {0} command launched=\n{1}".format(
                          macsy_process.returncode,
                          command))

        expected_result_path = os.path.join(self._data_dir, 'data_set_1', 'basic_run_results',
                                            'results.macsyfinder.json')
        with open(expected_result_path) as expected_result_file:
            expected_result_json = json.load(expected_result_file)

        test_result_path = os.path.join(self.out_dir, 'results.macsyfinder.json')
        with open(test_result_path) as test_result_file:
            test_result_json = json.load(test_result_file)

        # it should have only one occurrence of T9SS
        self.assertEqual(len(test_result_json), 1,
                         "different type of systems expected: 1  retrieved: {0}".format(len(test_result_json)))
        expected_result_json = expected_result_json[0]
        test_result_json = test_result_json[0]
        self.assertEqual(expected_result_json['name'],
                         test_result_json['name'],
                         "type of system name expected: {0}   retrieved: {1}".format(expected_result_json['name'],
                                                                                     test_result_json['name']))
        self.assertEqual(expected_result_json['occurrence_number'],
                         test_result_json['occurrence_number'],
                         "occurrence number expected {0}   retrieved: {1}".format(expected_result_json['occurrence_number'],
                                                                                  test_result_json['occurrence_number']))
        self.assertDictEqual(expected_result_json['replicon'],
                             test_result_json['replicon'],
                             "replicon expected {0}   retrieved: {1}".format(expected_result_json['occurrence_number'],
                                                                             test_result_json['occurrence_number']))
        self.assertEqual(expected_result_json['id'],
                         test_result_json['id'],
                         "system occurrence id expected {0}   retrieved: {1}".format(expected_result_json['id'],
                                                                                     test_result_json['id']))
        self.assertDictEqual(expected_result_json['summary']['mandatory'],
                             test_result_json['summary']['mandatory'],
                             "mandatory genes expected {0}   retrieved: {1}".format(expected_result_json['summary']['mandatory'],
                                                                                    test_result_json['summary']['mandatory']))
        self.assertDictEqual(expected_result_json['summary']['accessory'],
                             test_result_json['summary']['accessory'],
                             "accessory genes expected {0}   retrieved: {1}".format(expected_result_json['summary']['accessory'],
                                                                                    test_result_json['summary']['accessory']))
        self.assertDictEqual(expected_result_json['summary']['forbidden'],
                             test_result_json['summary']['forbidden'],
                             "forbidden genes expected {0}   retrieved: {1}".format(expected_result_json['summary']['forbidden'],
                                                                                    test_result_json['summary']['forbidden']))
        self.assertListEqual(expected_result_json['genes'], test_result_json['genes'],
                             "genes expected {0}   retrieved: {1}".format(expected_result_json['genes'],
                                                                          test_result_json['genes']))
示例#6
0
class Test(MacsyTest):

    def setUp(self):
        l = logging.getLogger()
        l.manager.loggerDict.clear()
        
        # add only one handler to the macsypy logger
        from macsypy.gene import _log
        macsy_log = _log.parent
        log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null'
        log_handler = logging.FileHandler(log_file)
        macsy_log.addHandler(log_handler)
        
        self.cfg = Config(hmmer_exe="hmmsearch",
                          sequence_db=os.path.join(self._data_dir, "base", "test_base.fa"),
                          db_type="gembase",
                          e_value_res=1,
                          i_evalue_sel=0.5,
                          def_dir=os.path.join(self._data_dir, 'DEF'),
                          res_search_dir=tempfile.gettempdir(),
                          res_search_suffix=".search_hmm.out",
                          profile_dir=os.path.join(self._data_dir, 'profiles'),
                          profile_suffix=".hmm",
                          res_extract_suffix="",
                          log_level=30,
                          log_file=log_file
                          )
        self.profile_registry = ProfilesRegistry(self.cfg)


    def tearDown(self):
        # close loggers filehandles, so they don't block file deletion
        # in shutil.rmtree calls in Windows
        logging.shutdown()
        l = logging.getLogger()
        l.manager.loggerDict.clear()
        try:
            shutil.rmtree(self.cfg.working_dir)
        except:
            pass


    def test_len(self):
        system = System(self.cfg, "T2SS", 10)
        gene = Gene(self.cfg, "abc", system, self.profile_registry)
        path = self.profile_registry.get("abc")
        profile = Profile(gene, self.cfg, path)
        self.assertEqual(len(profile), 501)


    def test_str(self):
        system = System(self.cfg, "T2SS", 10)
        gene = Gene(self.cfg, "abc", system, self.profile_registry)
        path = self.profile_registry.get("abc")
        profile = Profile(gene, self.cfg, path)
        s = "{0} : {1}".format(gene.name, path)
        self.assertEqual(str(profile), s)

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute(self):
        system = System(self.cfg, "T2SS", 10)
        gene = Gene(self.cfg, "abc", system, self.profile_registry)
        path = self.profile_registry.get("abc")
        profile = Profile(gene, self.cfg, path)
        report = profile.execute()
        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            first_l = hmmer_raw_out_file.readline()
            # a hmmsearch output file has been produced
            self.assertTrue(first_l.startswith("# hmmsearch :: search profile(s) against a sequence database"))
            for i in range(5):
                # skip 4 lines
                l = hmmer_raw_out_file.readline()
            # a hmmsearch used the abc profile line should become with: "# query HMM file:"
            path = os.path.join(self.cfg.profile_dir, gene.name + self.cfg.profile_suffix)
            self.assertTrue(l.find(path) != -1)

    def test_execute_unknown_binary(self):
        self.cfg.options['hmmer_exe'] = "Nimportnaoik"
        system = System(self.cfg, "T2SS", 10)
        gene = Gene(self.cfg, "abc", system, self.profile_registry)
        path = self.profile_registry.get("abc")
        profile = Profile(gene, self.cfg, path)
        self.assertRaises(RuntimeError, profile.execute)