Пример #1
0
    def test_fill_ordered_replicon_min_max(self):
        self.tearDown()
        self.cfg = Config(hmmer_exe="hmmsearch",
                          sequence_db=os.path.join(self._data_dir, "base",
                                                   "ordered_replicon_base"),
                          db_type="ordered_replicon",
                          e_value_res=1,
                          i_evalue_sel=0.5,
                          def_dir=os.path.join(self._data_dir, 'DEF'),
                          res_search_dir='/tmp',
                          res_search_suffix=".search_hmm.out",
                          profile_dir=os.path.join(self._data_dir, 'profiles'),
                          profile_suffix=".hmm",
                          res_extract_suffix="",
                          log_level=30,
                          log_file='/dev/null')

        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(
            self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(self.cfg)
        db._fill_ordered_min_max(self.cfg.replicon_topology)

        self.assertEqual(len(db._DB), 1)
        rep = db[RepliconDB.ordered_replicon_name]
        self.assertEqual(rep.topology, self.cfg.replicon_topology)
        self.assertEqual(rep.min, 1)
        self.assertEqual(rep.max, 52)
Пример #2
0
 def test_fill_gembase_min_max_with_topology(self):
     self.cfg.options['topology_file'] = self.cfg.sequence_db + ".topo"
     with open(self.cfg.topology_file , 'w') as f:
         f.write('# topology file\nESCO030p01 : circular\nPSAE001c01 : linear\n')
     RepliconDB.__init__ = self.fake_init
     db = RepliconDB(self.cfg)
     topo_dict = db._fill_topology()
     db._fill_gembase_min_max(topo_dict, 'circular')
     self.assertEqual(len(db._DB), 3)
     self.assertEqual(set(db._DB.keys()), set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx']))
     ESCO030p01 = db['ESCO030p01']
     self.assertEqual(ESCO030p01.topology, 'circular')
     self.assertEqual(ESCO030p01.min, 1)
     self.assertEqual(ESCO030p01.max, 67)
     self.assertEqual(ESCO030p01.genes, self.ESCO030p01_genes)
     PSAE001c01 = db['PSAE001c01']
     self.assertEqual(PSAE001c01.topology, 'linear')
     self.assertEqual(PSAE001c01.min, 68)
     self.assertEqual(PSAE001c01.max, 133)
     self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes)
     DBNC = db['NC_xxxxx_xx']
     self.assertEqual(DBNC.topology, 'circular')
     self.assertEqual(DBNC.min, 134)
     self.assertEqual(DBNC.max, 141)
     self.assertEqual(DBNC.genes, self.NCDB_genes)
Пример #3
0
 def test_items(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     self.assertItemsEqual(db.items(), [('ESCO030p01',ESCO030p01), ('NC_xxxxx_xx',NCXX),
                                        ('PSAE001c01',PSAE001c01)])
Пример #4
0
    def test_fill_ordered_replicon_min_max(self):
        self.tearDown()
        self.cfg = Config( hmmer_exe = "hmmsearch",
                           sequence_db = os.path.join(self._data_dir, "base", "ordered_replicon_base"),
                           db_type = "ordered_replicon",
                           e_value_res = 1,
                           i_evalue_sel = 0.5,
                           def_dir = os.path.join(self._data_dir, 'DEF'),
                           res_search_dir = tempfile.gettempdir(),
                           res_search_suffix = ".search_hmm.out",
                           profile_dir = os.path.join(self._data_dir, 'profiles'),
                           profile_suffix = ".hmm",
                           res_extract_suffix = "",
                           log_level = 30,
                           log_file = 'NUL' if platform.system() == 'Windows' else '/dev/null'
                           )

        shutil.copy(self.cfg.sequence_db, self.cfg.working_dir)
        self.cfg.options['sequence_db'] = os.path.join(self.cfg.working_dir, os.path.basename(self.cfg.sequence_db))

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(self.cfg)
        db._fill_ordered_min_max(self.cfg.replicon_topology)

        self.assertEqual(len(db._DB), 1)
        rep = db[RepliconDB.ordered_replicon_name]
        self.assertEqual(rep.topology, self.cfg.replicon_topology)
        self.assertEqual(rep.min, 1)
        self.assertEqual(rep.max, 52)
Пример #5
0
 def test_get(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
     self.assertEqual(ESCO030p01, db.get('ESCO030p01'))
     self.assertEqual(PSAE001c01, db.get('PSAE001c01'))
     self.assertIsNone(db.get('foo'))
     self.assertEqual('bar', db.get('foo', 'bar'))
Пример #6
0
 def test_items(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
     self.assertItemsEqual(db.items(), [('ESCO030p01',ESCO030p01),('PSAE001c01',PSAE001c01)])
     db = RepliconDB(self.cfg)
     PRRU001c01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
Пример #7
0
 def test_replicon_infos(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                               self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     values = db.replicon_infos()
     self.assertCountEqual(values, [ESCO030p01, NCXX, PSAE001c01])
Пример #8
0
 def test_fill_topology(self):
     self.cfg.options['topology_file'] = self.cfg.sequence_db + ".topo"
     db_send = {'ESCO030p01': 'circular', 'PSAE001c01': 'linear'}
     with open(self.cfg.topology_file, 'w') as f:
         for k, v in db_send.items():
             f.write('%s : %s\n' % (k, v))
     RepliconDB.__init__ = self.fake_init
     db = RepliconDB(self.cfg)
     rcv_topo = db._fill_topology()
     self.assertDictEqual(db_send, rcv_topo)
Пример #9
0
 def test_get(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133,
                               self.PSAE001c01_genes)
     self.assertEqual(ESCO030p01, db.get('ESCO030p01'))
     self.assertEqual(PSAE001c01, db.get('PSAE001c01'))
     self.assertIsNone(db.get('foo'))
     self.assertEqual('bar', db.get('foo', 'bar'))
Пример #10
0
 def test_iteritems(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                               self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     iter_items = db.iteritems()
     for item in [('ESCO030p01', ESCO030p01), ('PSAE001c01', PSAE001c01),
                  ('NC_xxxxx_xx', NCXX)]:
         with self.subTest(item=item):
             self.assertEqual(next(iter_items), item)
Пример #11
0
    def test_fill_topology(self):
        self.args.topology_file = self.args.sequence_db + ".topo"
        db_send = {'ESCO030p01': 'circular', 'PSAE001c01': 'linear'}
        with open(self.args.topology_file, 'w') as f:
            for k, v in list(db_send.items()):
                f.write('{0} : {1}\n'.format(k, v))

        cfg = Config(MacsyDefaults(), self.args)
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        rcv_topo = db._fill_topology()
        self.assertDictEqual(db_send, rcv_topo)
Пример #12
0
 def test_fill_topology(self):
     self.cfg.options['topology_file'] = self.cfg.sequence_db + ".topo"
     db_send = {'ESCO030p01' : 'circular',
                'PSAE001c01' : 'linear'
                }
     with open(self.cfg.topology_file , 'w') as f:
         for k, v in db_send.items():
             f.write('%s : %s\n' % (k,v))
     RepliconDB.__init__ = self.fake_init
     db = RepliconDB(self.cfg)
     rcv_topo = db._fill_topology()
     self.assertDictEqual(db_send, rcv_topo)
Пример #13
0
 def test_items(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133,
                               self.PSAE001c01_genes)
     self.assertItemsEqual(db.items(), [('ESCO030p01', ESCO030p01),
                                        ('PSAE001c01', PSAE001c01)])
     db = RepliconDB(self.cfg)
     PRRU001c01 = RepliconInfo(self.cfg.replicon_topology, 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133,
                               self.PSAE001c01_genes)
Пример #14
0
 def test_fill_gembase_min_max_default_topology(self):
     RepliconDB.__init__ = self.fake_init
     db = RepliconDB(self.cfg)
     db._fill_gembase_min_max({}, self.cfg.replicon_topology)
     self.assertEqual(len(db._DB), 2)
     PRRU001c01 = db['ESCO030p01']
     self.assertEqual(PRRU001c01.topology, 'circular')
     self.assertEqual(PRRU001c01.min, 1)
     self.assertEqual(PRRU001c01.max, 67)
     PSAE001c01 = db['PSAE001c01']
     self.assertEqual(PSAE001c01.topology, 'circular')
     self.assertEqual(PSAE001c01.min, 68)
     self.assertEqual(PSAE001c01.max, 133)
Пример #15
0
 def test_fill_gembase_min_max_default_topology(self):
     RepliconDB.__init__ = self.fake_init
     db = RepliconDB(self.cfg)
     db._fill_gembase_min_max({}, self.cfg.replicon_topology)
     self.assertEqual(len(db._DB), 2)
     PRRU001c01 = db['ESCO030p01']
     self.assertEqual(PRRU001c01.topology, 'circular')
     self.assertEqual(PRRU001c01.min, 1)
     self.assertEqual(PRRU001c01.max, 67)
     PSAE001c01 = db['PSAE001c01']
     self.assertEqual(PSAE001c01.topology, 'circular')
     self.assertEqual(PSAE001c01.min, 68)
     self.assertEqual(PSAE001c01.max, 133)
Пример #16
0
 def test_fill_gembase_min_max_with_topology(self):
     self.cfg.options['topology_file'] = self.cfg.sequence_db + ".topo"
     with open(self.cfg.topology_file, 'w') as f:
         f.write('ESCO030p01 : circular\nPSAE001c01 : linear\n')
     RepliconDB.__init__ = self.fake_init
     db = RepliconDB(self.cfg)
     topo_dict = db._fill_topology()
     db._fill_gembase_min_max(topo_dict, 'circular')
     self.assertEqual(len(db._DB), 2)
     ESCO030p01 = db['ESCO030p01']
     self.assertEqual(ESCO030p01.topology, 'circular')
     self.assertEqual(ESCO030p01.min, 1)
     self.assertEqual(ESCO030p01.max, 67)
     PSAE001c01 = db['PSAE001c01']
     self.assertEqual(PSAE001c01.topology, 'linear')
     self.assertEqual(PSAE001c01.min, 68)
     self.assertEqual(PSAE001c01.max, 133)
Пример #17
0
 def test_fill_gembase_min_max_with_topology(self):
     self.cfg.options['topology_file'] = self.cfg.sequence_db + ".topo"
     with open(self.cfg.topology_file , 'w') as f:
         f.write('ESCO030p01 : circular\nPSAE001c01 : linear\n')
     RepliconDB.__init__ = self.fake_init
     db = RepliconDB(self.cfg)
     topo_dict = db._fill_topology()
     db._fill_gembase_min_max(topo_dict, 'circular')
     self.assertEqual(len(db._DB), 2)
     ESCO030p01 = db['ESCO030p01']
     self.assertEqual(ESCO030p01.topology, 'circular')
     self.assertEqual(ESCO030p01.min, 1)
     self.assertEqual(ESCO030p01.max, 67)
     PSAE001c01 = db['PSAE001c01']
     self.assertEqual(PSAE001c01.topology, 'linear')
     self.assertEqual(PSAE001c01.min, 68)
     self.assertEqual(PSAE001c01.max, 133)
Пример #18
0
def _search_in_ordered_replicon(hits_by_replicon, models_to_detect, config,
                                logger):
    systems = []
    rejected_clusters = []
    rep_db = RepliconDB(config)
    for rep_name in hits_by_replicon:
        logger.info(
            "\n{:#^60}".format(f" Hits analysis for replicon {rep_name} "))
        rep_info = rep_db[rep_name]
        for model in models_to_detect:
            logger.info(f"Check model {model.fqn}")
            hits_related_one_model = model.filter(hits_by_replicon[rep_name])
            logger.debug("{:#^80}".format(" hits related to {} ".format(
                model.name)))
            logger.debug("".join([str(h) for h in hits_related_one_model]))
            logger.debug("#" * 80)
            logger.info("Building clusters")
            hit_weights = HitWeight(**config.hit_weights())
            clusters = cluster.build_clusters(hits_related_one_model, rep_info,
                                              model, hit_weights)
            logger.debug("{:#^80}".format("CLUSTERS"))
            logger.debug("\n" + "\n".join([str(c) for c in clusters]))
            logger.debug("#" * 80)
            logger.info("Searching systems")
            if model.multi_loci:
                # The loners are already in clusters lists with their context
                # so they are take in account
                clusters_combination = [
                    itertools.combinations(clusters, i)
                    for i in range(1,
                                   len(clusters) + 1)
                ]
            else:
                # we must add loners manually
                # but only if the cluster does not already contains them
                loners = cluster.get_loners(hits_related_one_model, model,
                                            hit_weights)
                clusters_combination = []
                for one_cluster in clusters:
                    one_clust_combination = [one_cluster]
                    filtered_loners = cluster.filter_loners(
                        one_cluster, loners)
                    one_clust_combination.extend(filtered_loners)
                    clusters_combination.append([one_clust_combination])

            for one_combination_set in clusters_combination:
                for one_clust_combination in one_combination_set:
                    ordered_matcher = OrderedMatchMaker(
                        model, redundancy_penalty=config.redundancy_penalty())
                    res = ordered_matcher.match(one_clust_combination)
                    if isinstance(res, System):
                        systems.append(res)
                    else:
                        rejected_clusters.append(res)
    if systems:
        systems.sort(key=lambda syst: (syst.replicon_name, syst.position[0],
                                       syst.model.fqn, -syst.score))
    return systems, rejected_clusters
Пример #19
0
    def test_fill_gembase_min_max_oredered_replicon(self):
        seq_ori = self.find_data("base", "ordered_replicon_base.fasta")
        shutil.copy(seq_ori, self.args.out_dir)
        self.args.sequence_db = os.path.join(self.args.out_dir,
                                             os.path.basename(seq_ori))
        cfg = Config(MacsyDefaults(), self.args)

        idx = Indexes(cfg)
        idx.build()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        with self.assertRaises(MacsypyError) as ctx:
            with self.catch_log() as log:
                db._fill_gembase_min_max({}, self.cfg.replicon_topology())
        self.assertEqual(
            str(ctx.exception),
            f"Error during sequence-db '{self.args.sequence_db}' parsing. "
            f"Are you sure db-type is 'gembase'?")
Пример #20
0
 def test_getitem(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67,
                               self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133,
                               self.PSAE001c01_genes)
     self.assertEqual(ESCO030p01, db['ESCO030p01'])
     self.assertEqual(PSAE001c01, db['PSAE001c01'])
     self.assertRaises(KeyError, db.__getitem__, 'foo')
Пример #21
0
 def test_getitem(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     self.assertEqual(ESCO030p01, db['ESCO030p01'])
     self.assertEqual(PSAE001c01, db['PSAE001c01'])
     self.assertEqual(NCXX, db['NC_xxxxx_xx'])
     self.assertRaises(KeyError, db.__getitem__, 'foo')
Пример #22
0
    def test_fill_ordered_replicon_min_max(self):
        seq_ori = self.find_data("base", "ordered_replicon_base.fasta")
        shutil.copy(seq_ori, self.args.out_dir)
        self.args.sequence_db = os.path.join(self.args.out_dir,
                                             os.path.basename(seq_ori))
        cfg = Config(MacsyDefaults(), self.args)

        idx = Indexes(cfg)
        idx.build()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        db._fill_ordered_min_max(cfg.replicon_topology())

        self.assertEqual(len(db._DB), 1)
        rep = db[RepliconDB.ordered_replicon_name]
        self.assertEqual(rep.topology, cfg.replicon_topology())
        self.assertEqual(rep.min, 1)
        self.assertEqual(rep.max, 52)
Пример #23
0
 def test_fill_gembase_min_max_default_topology(self):
     RepliconDB.__init__ = self.fake_init
     db = RepliconDB(self.cfg)
     db._fill_gembase_min_max({}, self.cfg.replicon_topology)
     self.assertEqual(len(db._DB), 3)
     self.assertEqual(set(db._DB.keys()), set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx']))
     PRRU001c01 = db['ESCO030p01']
     self.assertEqual(PRRU001c01.topology, 'circular')
     self.assertEqual(PRRU001c01.min, 1)
     self.assertEqual(PRRU001c01.max, 67)
     self.assertEqual(PRRU001c01.genes, self.ESCO030p01_genes)
     PSAE001c01 = db['PSAE001c01']
     self.assertEqual(PSAE001c01.topology, 'circular')
     self.assertEqual(PSAE001c01.min, 68)
     self.assertEqual(PSAE001c01.max, 133)
     self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes)
     DBNC = db['NC_xxxxx_xx']
     self.assertEqual(DBNC.topology, 'circular')
     self.assertEqual(DBNC.min, 134)
     self.assertEqual(DBNC.max, 141)
     self.assertEqual(DBNC.genes, self.NCDB_genes)
Пример #24
0
 def test_get(self):
     db = RepliconDB(self.cfg)
     ESCO030p01 = RepliconInfo(self.cfg.replicon_topology, 1, 67, self.ESCO030p01_genes)
     PSAE001c01 = RepliconInfo(self.cfg.replicon_topology, 68, 133, self.PSAE001c01_genes)
     NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
     self.assertEqual(ESCO030p01, db.get('ESCO030p01'))
     self.assertEqual(PSAE001c01, db.get('PSAE001c01'))
     self.assertEqual(NCXX, db.get('NC_xxxxx_xx', 'foo'))
     self.assertIsNone(db.get('foo'))
     self.assertEqual('bar', db.get('foo', 'bar'))
Пример #25
0
 def test_in(self):
     db = RepliconDB(self.cfg)
     self.assertIn('ESCO030p01', db)
     self.assertIn('PSAE001c01', db)
     self.assertNotIn('toto', db)
Пример #26
0
 def test_names(self):
     db = RepliconDB(self.cfg)
     exp_name = ['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx']
     self.assertListEqual(db.replicon_names(), exp_name)