Пример #1
0
 def test_contains(self):
     system_foo = System(self.cfg, "foo", 10)
     gene_in = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
     gene_bank.add_gene(gene_in)
     self.assertIn(gene_in, gene_bank)
     gene_out = Gene(self.cfg, 'abc', system_foo, self.profile_registry)
     self.assertNotIn( gene_out, gene_bank)
 def test_is_aligned(self):
     system = System(self.cfg, "T2SS", 10)
     gene_ref = Gene(self.cfg, 'sctJ_FLG', system, self.profile_registry)
     gene = Gene(self.cfg, 'sctJ', system, self.profile_registry)
     homolog = Homolog(gene, gene_ref)
     self.assertFalse(homolog.is_aligned())
     homolog = Homolog(gene, gene_ref, aligned=True)
     self.assertTrue(homolog.is_aligned())
Пример #3
0
 def test_add_homolog(self):
     system_foo = System(self.cfg, "foo", 10)
     system_bar = System(self.cfg, "bar", 10)
     gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
     gene_ref = Gene(self.cfg, 'sctJ', system_bar, self.profile_registry)
     homolog = Homolog(self.cfg, gene, gene_ref)
     gene.add_homolog(homolog)
     self.assertEqual(len(gene.homologs), 1)
     self.assertEqual(gene.homologs[0], homolog)
Пример #4
0
 def test_iter(self):
     system_foo = System(self.cfg, "foo", 10)
     genes = [Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry), Gene(self.cfg, 'abc', system_foo, self.profile_registry)]
     for g in genes:
         gene_bank.add_gene(g)
     i = 0
     for g in gene_bank:
         self.assertIn(g, genes)
         i = i + 1
     self.assertEqual(i, len(genes))
Пример #5
0
 def test_get_homologs(self):
     system_foo = System(self.cfg, "foo", 10)
     system_bar = System(self.cfg, "bar", 10)
     gene = Gene(self.cfg, 'sctN', system_foo, self.profile_registry)
     sctJ_FLG = Gene(self.cfg, 'sctJ_FLG', system_foo,
                     self.profile_registry)
     sctJ = Gene(self.cfg, 'sctJ', system_bar, self.profile_registry)
     homolog_1 = Homolog(sctJ_FLG, gene)
     gene.add_homolog(homolog_1)
     homolog_2 = Homolog(sctJ, gene)
     gene.add_homolog(homolog_2)
     self.assertEqual(gene.get_homologs(), [homolog_1, homolog_2])
Пример #6
0
 def test_loner(self):
     """
     test getter for loner property
     """
     system_foo = System(self.cfg, "foo", 10)
     gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
     self.assertFalse(gene.loner)
     gene = Gene(self.cfg,
                 'sctJ',
                 system_foo,
                 self.profile_registry,
                 loner=True)
     self.assertTrue(gene.loner)
Пример #7
0
 def test_multi_system(self):
     """
     test getter for multi_system property
     """
     system_foo = System(self.cfg, "foo", 10)
     gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
     self.assertFalse(gene.multi_system)
     gene = Gene(self.cfg,
                 'sctJ',
                 system_foo,
                 self.profile_registry,
                 multi_system=True)
     self.assertTrue(gene.multi_system)
Пример #8
0
 def test_exchangeable(self):
     """
     test getter for exchangeable property
     """
     system_foo = System(self.cfg, "foo", 10)
     gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
     self.assertFalse(gene.exchangeable)
     gene = Gene(self.cfg,
                 'sctJ',
                 system_foo,
                 self.profile_registry,
                 exchangeable=True)
     self.assertTrue(gene.exchangeable)
Пример #9
0
    def test_str(self):
        """
        """
        system_foo = System(self.cfg, "foo", 10)
        gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
        system_bar = System(self.cfg, "bar", 20)
        gene_homolog = Gene(self.cfg, 'sctJ', system_bar,
                            self.profile_registry)
        homolog = Homolog(gene_homolog, gene, self.cfg)
        gene.add_homolog(homolog)
        s = """name : sctJ_FLG
inter_gene_max_space: 10
    homologs: sctJ"""
        self.assertEqual(str(gene), s)
Пример #10
0
 def test_inter_gene_max_space(self):
     """
     test getter for inter_gene_max_space property
     """
     system_inter_gene_max_space = 40
     gene_inter_gene_max_space = 50
     system_foo = System(self.cfg, "foo", system_inter_gene_max_space)
     gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
     self.assertEqual(gene.inter_gene_max_space,
                      system_inter_gene_max_space)
     gene = Gene(self.cfg,
                 'sctJ',
                 system_foo,
                 self.profile_registry,
                 inter_gene_max_space=gene_inter_gene_max_space)
     self.assertEqual(gene.inter_gene_max_space, gene_inter_gene_max_space)
Пример #11
0
 def test_execute_unknown_binary(self):
     self.cfg.options['hmmer_exe'] = "Nimportnaoik"
     system = System(self.cfg, "T2SS", 10)
     gene = Gene(self.cfg, "abc", system, self.profile_registry)
     path = self.profile_registry.get("abc")
     profile = Profile(gene, self.cfg, path)
     self.assertRaises(RuntimeError, profile.execute)
 def test_add_forbidden_gene(self):
     system = System(self.cfg, "foo", 10)
     gene = Gene(self.cfg, 'sctJ_FLG', system, self.profile_registry)
     system.add_forbidden_gene(gene)
     self.assertEqual(system._forbidden_genes, [gene])
     self.assertEqual(system._accessory_genes, [])
     self.assertEqual(system._mandatory_genes, [])
Пример #13
0
 def test_GembaseHMMReport_extract(self):
     system = System(self.cfg, "T2SS", 10)
     gene_name = "gspD"
     gene = Gene(self.cfg, gene_name, system, self.profile_registry)
     shutil.copy(
         os.path.join(self._data_dir, "hmm",
                      gene_name + self.cfg.res_search_suffix),
         self.cfg.working_dir)
     report_path = os.path.join(self.cfg.working_dir,
                                gene_name + self.cfg.res_search_suffix)
     report = GembaseHMMReport(gene, report_path, self.cfg)
     report.extract()
     self.assertEqual(len(report.hits), 6)
     #   gene, system,     hit_id,        hit_seq_length replicon_name, pos_hit, i_eval,          score,       profile_coverage, sequence_coverage, begin_match, end_match
     hits = [
         Hit(gene, system, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
             float(2e-236), float(779.2), float(1.000000),
             (741.0 - 104.0 + 1) / 803, 104, 741),
         Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 68,
             float(1.2e-234), float(779.2), float(1.000000),
             (741.0 - 104.0 + 1) / 803, 104, 741),
         Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 69,
             float(3.7e-76), float(255.8), float(1.000000),
             (736.0 - 105.0 + 1) / 759, 105, 736),
         Hit(gene, system, "PSAE001c01_017350", 600, "PSAE001c01", 70,
             float(3.2e-27), float(94.2), float(0.500000),
             (506.0 - 226.0 + 1) / 600, 226, 506),
         Hit(gene, system, "PSAE001c01_018920", 776, "PSAE001c01", 71,
             float(6.1e-183), float(608.4), float(1.000000),
             (606.0 - 48.0 + 1) / 776, 48, 606),
         Hit(gene, system, "PSAE001c01_031420", 658, "PSAE001c01", 73,
             float(1.8e-210), float(699.3), float(1.000000),
             (614.0 - 55.0 + 1) / 658, 55, 614)
     ]
     self.assertListEqual(hits, report.hits)
Пример #14
0
 def test_str(self):
     system = System(self.cfg, "T2SS", 10)
     gene = Gene(self.cfg, "abc", system, self.profile_registry)
     path = self.profile_registry.get("abc")
     profile = Profile(gene, self.cfg, path)
     s = "{0} : {1}".format(gene.name, path)
     self.assertEqual(str(profile), s)
Пример #15
0
    def test_str(self):
        system = System(self.cfg, "T2SS", 10)
        gene_name = "gspD"
        gene = Gene(self.cfg, "gspD", system, self.profile_registry)
        hit_prop = {
            'id': "PSAE001c01_006940",
            'hit_seq_len': 803,
            'replicon_name': "PSAE001c01",
            'position': 694,
            'i_eval': float(1.2e-234),
            'score': float(779.2),
            'gene_name': gene.name,
            'system_name': system.name,
            'profil_coverage': float(1.0),
            'sequence_coverage': float(638.000000),
            'begin': 104,
            'end': 741
        }

        hit = Hit(gene, system, hit_prop['id'], hit_prop['hit_seq_len'],
                  hit_prop['replicon_name'], hit_prop['position'],
                  hit_prop['i_eval'], hit_prop['score'],
                  hit_prop['profil_coverage'], hit_prop['sequence_coverage'],
                  hit_prop['begin'], hit_prop['end'])
        s = "%(id)s\t%(replicon_name)s\t%(position)d\t%(hit_seq_len)d\t%(gene_name)s\t%(system_name)s\t%(i_eval)s\t%(score)s\t%(profil_coverage)f\t%(sequence_coverage)f\t%(begin)d\t%(end)d\n" % hit_prop
        self.assertEqual(s, str(hit))
Пример #16
0
 def test_get_uniq_object(self):
     system_foo = System(self.cfg, "foo", 10)
     gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
     path = self.profile_registry.get('sctJ_FLG')
     profile1 = profile_factory.get_profile(gene, self.cfg, path)
     profile2 = profile_factory.get_profile(gene, self.cfg, path)
     self.assertEqual(profile1, profile2)
 def test_get_profile(self):
     system_foo = System(self.cfg, "foo", 10)
     gene_name = 'sctJ_FLG'
     gene = Gene(self.cfg, gene_name, system_foo, self.profile_registry)
     profile = profile_factory.get_profile(gene, self.cfg, self.profile_registry )
     self.assertTrue( isinstance( profile, Profile ))
     self.assertEqual( profile.gene.name, gene_name )
Пример #18
0
 def test_system(self):
     """
     test getter/setter for system property
     """
     system_foo = System(self.cfg, "foo", 10)
     gene = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
     self.assertEqual(gene.system, system_foo)
Пример #19
0
    def test_str(self):
        system = System(self.cfg, "T2SS", 10)
        gene_name = "gspD"
        gene = Gene(self.cfg, "gspD", system, self.profile_registry)
        hit_prop = {
            'id': "PSAE001c01_006940",
            'hit_seq_len': 803,
            'replicon_name': "PSAE001c01",
            'position': 694,
            'i_eval': float(1.2e-234),
            'score': float(779.2),
            'gene_name': gene.name,
            'system_name': system.name,
            'profil_coverage': float(1.0),
            'sequence_coverage': float(638.000000),
            'begin': 104,
            'end': 741
        }

        hit = Hit(gene, system, hit_prop['id'], hit_prop['hit_seq_len'],
                  hit_prop['replicon_name'], hit_prop['position'],
                  hit_prop['i_eval'], hit_prop['score'],
                  hit_prop['profil_coverage'], hit_prop['sequence_coverage'],
                  hit_prop['begin'], hit_prop['end'])
        s = "{id}\t{replicon_name}\t{position:d}\t{hit_seq_len:d}\t{gene_name}\t{system_name}\t{i_eval:.3e}\t{score:.3f}\t{profil_coverage:.3f}\t{sequence_coverage:.3f}\t{begin:d}\t{end:d}\n".format(
            **hit_prop)
        self.assertEqual(s, str(hit))
Пример #20
0
 def test_get_uniq_object(self):
     system_foo = System(self.cfg, "foo", 10)
     gene_in = Gene(self.cfg, 'sctJ_FLG', system_foo, self.profile_registry)
     gene_bank.add_gene(gene_in)
     gene1 = gene_bank['sctJ_FLG']
     gene2 = gene_bank['sctJ_FLG']
     self.assertEqual(gene1, gene2)
     self.assertIs(gene1, gene2)
Пример #21
0
 def test_add_get_gene(self):
     gene_name = 'sctJ_FLG'
     self.assertRaises(KeyError, gene_bank.__getitem__, gene_name)
     system_foo = System( "foo", self.cfg, 10)
     gene = Gene(self.cfg, gene_name, system_foo, self.profile_registry)
     gene_bank.add_gene(gene)
     gene_from_bank = gene_bank[gene_name]
     self.assertTrue(isinstance(gene_from_bank, Gene))
     self.assertEqual(gene_from_bank, gene)
Пример #22
0
 def test_HMMReport(self):
     system = System(self.cfg, "T2SS", 10)
     gene_name = "gspD"
     gene = Gene(self.cfg, gene_name, system, self.profile_registry)
     shutil.copy(
         os.path.join(self._data_dir, "hmm",
                      gene_name + self.cfg.res_search_suffix),
         self.cfg.working_dir)
     report_path = os.path.join(self.cfg.working_dir,
                                gene_name + self.cfg.res_search_suffix)
     self.assertRaises(TypeError, HMMReport, gene, report_path, self.cfg)
Пример #23
0
    def test_GembaseHMMReport_extract_concurent(self):
        system = System(self.cfg, "T2SS", 10)
        gene_name = "gspD"
        gene = Gene(self.cfg, gene_name, system, self.profile_registry)
        shutil.copy(
            os.path.join(self._data_dir, "hmm",
                         gene_name + self.cfg.res_search_suffix),
            self.cfg.working_dir)
        report_path = os.path.join(self.cfg.working_dir,
                                   gene_name + self.cfg.res_search_suffix)
        reports = []
        for i in range(5):
            report = GembaseHMMReport(gene, report_path, self.cfg)
            reports.append(report)

        import threading

        def worker(report):
            report.extract()

        for report in reports:
            t = threading.Thread(target=worker, args=(report, ))
            t.start()
        main_thread = threading.currentThread()
        for t in threading.enumerate():
            if t is main_thread:
                continue
        t.join()

        #gene, system,     hit_id,        hit_seq_length replicon_name, pos_hit, i_eval,          score,       profile_coverage, sequence_coverage, begin_match, end_match
        hits = [
            Hit(gene, system, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
                float(2e-236), float(779.2), float(1.000000),
                (741.0 - 104.0 + 1) / 803, 104, 741),
            Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 68,
                float(1.2e-234), float(779.2), float(1.000000),
                (741.0 - 104.0 + 1) / 803, 104, 741),
            Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 69,
                float(3.7e-76), float(255.8), float(1.000000),
                (736.0 - 105.0 + 1) / 759, 105, 736),
            Hit(gene, system, "PSAE001c01_017350", 600, "PSAE001c01", 70,
                float(3.2e-27), float(94.2), float(0.500000),
                (506.0 - 226.0 + 1) / 600, 226, 506),
            Hit(gene, system, "PSAE001c01_018920", 776, "PSAE001c01", 71,
                float(6.1e-183), float(608.4), float(1.000000),
                (606.0 - 48.0 + 1) / 776, 48, 606),
            Hit(gene, system, "PSAE001c01_031420", 658, "PSAE001c01", 73,
                float(1.8e-210), float(699.3), float(1.000000),
                (614.0 - 55.0 + 1) / 658, 55, 614)
        ]
        for report in reports:
            report.save_extract()
            self.assertEqual(len(report.hits), len(hits))
            self.assertListEqual(report.hits, hits)
    def test_get_gene_ref(self):
        system = System(self.cfg, "foo", 10)
        gene_name = 'sctJ_FLG'
        gene_ref = Gene(self.cfg, gene_name, system, self.profile_registry)
        homolog_name = 'sctJ'
        gene_homolg = Gene(self.cfg, homolog_name, system,
                           self.profile_registry)
        homolog = Homolog(gene_homolg, gene_ref)
        gene_ref.add_homolog(homolog)

        for meth in (system.add_forbidden_gene, system.add_accessory_gene,
                     system.add_mandatory_gene):
            system._mandatory_genes = []
            system._accessory_genes = []
            system._forbidden_genes = []
            meth(gene_ref)
            self.assertEqual(gene_ref, system.get_gene_ref(homolog))
        self.assertIsNone(system.get_gene_ref(gene_ref))
        gene_ukn = Gene(self.cfg, 'abc', system, self.profile_registry)
        self.assertRaises(KeyError, system.get_gene_ref, gene_ukn)
Пример #25
0
 def test_cmp(self):
     system = System(self.cfg, "T2SS", 10)
     gene_name = "gspD"
     gene = Gene(self.cfg, "gspD", system, self.profile_registry)
     h0 = Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 3450,
              float(1.2e-234), float(779.2), float(1.000000),
              (741.0 - 104.0 + 1) / 803, 104, 741)
     h1 = Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 4146,
              float(3.7e-76), float(255.8), float(1.000000),
              (736.0 - 105.0 + 1) / 759, 105, 736)
     self.assertGreater(h1, h0)
     self.assertLess(h0, h1)
    def test_get_gene(self):
        system = System(self.cfg, "foo", 10)
        gene_name = 'sctJ_FLG'
        gene = Gene(self.cfg, gene_name, system, self.profile_registry)
        for meth in (system.add_forbidden_gene, system.add_accessory_gene,
                     system.add_mandatory_gene):
            system._mandatory_genes = []
            system._accessory_genes = []
            system._forbidden_genes = []
            meth(gene)
            self.assertEqual(gene, system.get_gene(gene_name))

        self.assertRaises(KeyError, system.get_gene, 'bar')

        homolog_name = 'sctJ'
        gene_homolog = Gene(self.cfg, homolog_name, system,
                            self.profile_registry)
        homolog = Homolog(gene_homolog, gene)
        gene.add_homolog(homolog)
        for meth in (system.add_forbidden_gene, system.add_accessory_gene,
                     system.add_mandatory_gene):
            system._mandatory_genes = []
            system._accessory_genes = []
            system._forbidden_genes = []
            meth(gene)
            self.assertEqual(homolog, system.get_gene(homolog_name))

        analog_name = 'sctC'
        gene_analog = Gene(self.cfg, analog_name, system,
                           self.profile_registry)
        analog = Analog(gene_analog, gene)
        gene.add_analog(analog)
        for meth in (system.add_forbidden_gene, system.add_accessory_gene,
                     system.add_mandatory_gene):
            system._mandatory_genes = []
            system._accessory_genes = []
            system._forbidden_genes = []
            meth(gene)
            self.assertEqual(analog, system.get_gene(analog_name))
 def test_min_genes_required(self):
     name = 'foo'
     min_genes_required = 40
     system = System(self.cfg,
                     name,
                     10,
                     min_genes_required=min_genes_required)
     gene = Gene(self.cfg, 'sctJ_FLG', system, self.profile_registry)
     system.add_mandatory_gene(gene)
     self.assertEqual(system.min_genes_required, min_genes_required)
     #see https://projets.pasteur.fr/issues/1850
     system = System(self.cfg, name, 10)
     self.assertEqual(system.min_genes_required,
                      len(system.mandatory_genes))
Пример #28
0
 def test_execute(self):
     system = System(self.cfg, "T2SS", 10)
     gene = Gene(self.cfg, "abc", system, self.profile_registry)
     path = self.profile_registry.get("abc")
     profile = Profile(gene, self.cfg, path)
     report = profile.execute()
     hmmer_raw_out = profile.hmm_raw_output
     with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
         first_l = hmmer_raw_out_file.readline()
         # a hmmsearch output file has been produced
         self.assertTrue(first_l.startswith("# hmmsearch :: search profile(s) against a sequence database"))
         for i in range(5):
             # skip 4 lines
             l = hmmer_raw_out_file.readline()
         # a hmmsearch used the abc profile line should become with: "# query HMM file:"
         path = os.path.join(self.cfg.profile_dir, gene.name + self.cfg.profile_suffix)
         self.assertTrue(l.find(path) != -1)
Пример #29
0
 def test_str(self):
     system = System(self.cfg, "T2SS", 10)
     gene_name = "gspD"
     gene = Gene(self.cfg, gene_name, system, self.profile_registry)
     shutil.copy(
         os.path.join(self._data_dir, "hmm",
                      gene_name + self.cfg.res_search_suffix),
         self.cfg.working_dir)
     report_path = os.path.join(self.cfg.working_dir,
                                gene_name + self.cfg.res_search_suffix)
     report = GembaseHMMReport(gene, report_path, self.cfg)
     report.extract()
     hits = [
         Hit(gene, system, "NC_xxxxx_xx_056141", 803, "NC_xxxxx_xx", 141,
             float(2e-236), float(779.2), float(1.000000),
             (741.0 - 104.0 + 1) / 803, 104, 741),
         Hit(gene, system, "PSAE001c01_006940", 803, "PSAE001c01", 68,
             float(1.2e-234), float(779.2), float(1.000000),
             (741.0 - 104.0 + 1) / 803, 104, 741),
         Hit(gene, system, "PSAE001c01_013980", 759, "PSAE001c01", 69,
             float(3.7e-76), float(255.8), float(1.000000),
             (736.0 - 105.0 + 1) / 759, 105, 736),
         Hit(gene, system, "PSAE001c01_017350", 600, "PSAE001c01", 70,
             float(3.2e-27), float(94.2), float(0.500000),
             (506.0 - 226.0 + 1) / 600, 226, 506),
         Hit(gene, system, "PSAE001c01_018920", 776, "PSAE001c01", 71,
             float(6.1e-183), float(608.4), float(1.000000),
             (606.0 - 48.0 + 1) / 776, 48, 606),
         Hit(gene, system, "PSAE001c01_031420", 658, "PSAE001c01", 73,
             float(1.8e-210), float(699.3), float(1.000000),
             (614.0 - 55.0 + 1) / 658, 55, 614)
     ]
     s = ""
     s = "# gene: {0} extract from {1} hmm output\n".format(
         gene.name, report_path)
     s += "# profile length= {0:d}\n".format(len(gene.profile))
     s += "# i_evalue threshold= {0:.3f}\n".format(self.cfg.i_evalue_sel)
     s += "# coverage threshold= {0:.3f}\n".format(
         self.cfg.coverage_profile)
     s += "# hit_id replicon_name position_hit hit_sequence_length gene_name gene_system i_eval score profile_coverage sequence_coverage begin end\n"
     for h in hits:
         s += str(h)
     self.assertEqual(str(report), s)
 def test_delegation(self):
     system = System(self.cfg, "T2SS", 10)
     gene_ref = Gene(self.cfg, 'sctJ_FLG', system, self.profile_registry)
     gene = Gene(self.cfg, 'sctJ', system, self.profile_registry)
     homolog = Homolog(gene, gene_ref)
     self.assertEqual(homolog.system, system)