示例#1
0
    def test_SystemSerializer_str(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir(), model_name))
        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)

        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_gspd)
        model_B.add_accessory_gene(gene_tadZ)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, self.hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_B, self.hit_weights)

        sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], self.cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"
        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        system_serializer = TxtSystemSerializer()

        sys_str = f"""system id = {sys_A.id}
model = foo/A
replicon = replicon_id
clusters = [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1), ('hit_gspd', 'gspD', 1)], [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1)]
occ = 2
wholeness = 1.000
loci nb = 2
score = 1.500

mandatory genes:
\t- sctN: 2 (sctN, sctN)
\t- sctJ: 2 (sctJ, sctJ)

accessory genes:
\t- gspD: 1 (gspD [sys_id_B])

neutral genes:
"""
        self.assertEqual(
            sys_str, system_serializer.serialize(sys_A, hit_multi_sys_tracker))
示例#2
0
    def test_SolutionSerializer_tsv(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir(), model_name))
        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)

        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, self.hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        ], model_B, self.hit_weights)

        sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], self.cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"

        sol = [sys_A, sys_B]
        sol_id = '12'

        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        system_serializer = TsvSolutionSerializer()

        sol_tsv = '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        ser = system_serializer.serialize(sol, sol_id, hit_multi_sys_tracker)
        self.assertEqual(ser, sol_tsv)
示例#3
0
    def test_solutions_to_tsv(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)
        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)
        model_C = Model("foo/C", 10)

        c_gene_sctn_flg = CoreGene(models_location, "sctN_FLG",
                                   profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(models_location, "sctJ_FLG",
                                   profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(models_location, "flgB", profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(models_location, "tadZ", profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(models_location, "sctN", profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(models_location, "sctJ", profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(models_location, "gspD", profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(models_location, "abc", profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        model_C.add_mandatory_gene(gene_sctn_flg)
        model_C.add_mandatory_gene(gene_sctj_flg)
        model_C.add_mandatory_gene(gene_flgB)
        model_C.add_accessory_gene(gene_tadZ)
        model_C.add_accessory_gene(gene_gspd)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        hit_weights = HitWeight(**cfg.hit_weights())
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        ], model_B, hit_weights)

        model_C._min_mandatory_genes_required = 1
        model_C._min_genes_required = 2
        c4 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_C, hit_weights)

        sys_A = System(model_A, [c1, c2], cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"
        sys_C = System(model_C, [c4], cfg.redundancy_penalty())
        sys_C.id = "sys_id_C"

        sol_1 = [sys_A, sys_B]
        sol_2 = [sys_A, sys_C]
        sol_id_1 = '1'
        sol_id_2 = '2'

        sol_tsv = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Systems found:
"""
        sol_tsv += "\t".join([
            "sol_id", "replicon", "hit_id", "gene_name", "hit_pos",
            "model_fqn", "sys_id", "sys_loci", "sys_wholeness", "sys_score",
            "sys_occ", "hit_gene_ref", "hit_status", "hit_seq_len",
            "hit_i_eval", "hit_score", "hit_profile_cov", "hit_seq_cov",
            "hit_begin_match", "hit_end_match", "used_in"
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'flgB', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_A'
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"

        f_out = StringIO()
        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        solutions_to_tsv([sol_1, sol_2], hit_multi_sys_tracker, f_out)
        self.assertMultiLineEqual(sol_tsv, f_out.getvalue())
示例#4
0
    def test_SolutionSerializer_tsv(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir()[0], model_name))

        ###########
        # Model B #
        ###########
        model_B = Model("foo/B", 10)
        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        ###########
        # Model A #
        ###########
        model_A = Model("foo/A", 10)
        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A, loner=True)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_accessory_gene(gene_abc)

        #       CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        #                                                           pos      score
        h_sctj = CoreHit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_sctj = ModelHit(h_sctj, gene_sctj, GeneStatus.MANDATORY)
        h_sctn = CoreHit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_sctn = ModelHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        h_gspd = CoreHit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_gspd = ModelHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)

        h_sctj_flg = CoreHit(c_gene_sctj_flg, "hit_sctj_flg", 803,
                             "replicon_id", 10, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = CoreHit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 11, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        h_abc = CoreHit(c_gene_abc, "hit_abc", 803, "replicon_id", 20, 1.0,
                        1.0, 1.0, 1.0, 10, 20)
        h_abc2 = CoreHit(c_gene_abc, "hit_abc2", 803, "replicon_id", 50, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        h_tadZ = CoreHit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 40, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_sctj_flg = ModelHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY)
        mh_flgB = ModelHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        mh_abc = ModelHit(h_abc, gene_abc, GeneStatus.ACCESSORY)
        mh_abc2 = ModelHit(h_abc2, gene_abc, GeneStatus.ACCESSORY)
        mh_tadZ = ModelHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([mh_sctj, mh_sctn, mh_gspd], model_A, self.hit_weights)
        c2 = Cluster([mh_sctj, mh_sctn], model_A, self.hit_weights)
        c3 = Cluster([
            Loner(h_abc,
                  gene_ref=gene_abc,
                  gene_status=GeneStatus.ACCESSORY,
                  counterpart=[mh_abc2])
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c5 = Cluster([mh_sctj_flg, mh_tadZ, mh_flgB], model_B,
                     self.hit_weights)

        sys_A = System(model_A, [c1, c2, c3], self.cfg.redundancy_penalty())
        # score =               2.5, 2 , 0.35 = 4.85 - (2 * 1.5) = 1.85

        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c5], self.cfg.redundancy_penalty())
        # score =                2.0
        sys_B.id = "sys_id_B"

        sol = Solution([sys_A, sys_B])
        sol_id = '12'

        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        sol_serializer = TsvSolutionSerializer()

        sol_tsv = '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1', '1.000', '1.850', '2', 'sctJ', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '2', 'foo/A',
            'sys_id_A', '2', '1', '1.000', '1.850', '2', 'sctN', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_gspd', 'gspD', '3', 'foo/A',
            'sys_id_A', '2', '1', '1.000', '1.850', '2', 'gspD', 'accessory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '2', '1.000', '1.850', '2', 'sctJ', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '2', 'foo/A',
            'sys_id_A', '2', '2', '1.000', '1.850', '2', 'sctN', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_abc', 'abc', '20', 'foo/A', 'sys_id_A',
            '2', '-1', '1.000', '1.850', '2', 'abc', 'accessory', '803', '1.0',
            '1.000', '1.000', '1.000', '10', '20', 'hit_abc2', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '10', 'foo/B',
            'sys_id_B', '1', '1', '0.750', '2.000', '1', 'sctJ_FLG',
            'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20',
            '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_flgB', 'flgB', '11', 'foo/B',
            'sys_id_B', '1', '1', '0.750', '2.000', '1', 'flgB', 'accessory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_tadZ', 'tadZ', '40', 'foo/B',
            'sys_id_B', '1', '1', '0.750', '2.000', '1', 'tadZ', 'accessory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        ser = sol_serializer.serialize(sol, sol_id, hit_multi_sys_tracker)
        self.maxDiff = None
        self.assertEqual(ser, sol_tsv)
示例#5
0
def _build_systems(models, clusters, cfg):

    systems = {}
    # we need to tweek the replicon_id to have stable ressults
    # whatever the number of tests ran
    # or the tests order
    systems['A'] = System(models['A'], [clusters['c1'], clusters['c2']],
                          cfg.redundancy_penalty())  # 5 hits
    systems['A'].id = "replicon_id_A"
    systems['B'] = System(models['B'], [clusters['c3']],
                          cfg.redundancy_penalty())  # 3 hits
    systems['B'].id = "replicon_id_B"
    systems['C'] = System(models['C'], [clusters['c4']],
                          cfg.redundancy_penalty())  # 4 hits
    systems['C'].id = "replicon_id_C"
    systems['D'] = System(models['D'], [clusters['c5']],
                          cfg.redundancy_penalty())  # 2 hits
    systems['D'].id = "replicon_id_D"
    systems['E'] = System(models['E'], [clusters['c6']],
                          cfg.redundancy_penalty())  # 1 hit
    systems['E'].id = "replicon_id_E"
    systems['F'] = System(models['F'], [clusters['c7']],
                          cfg.redundancy_penalty())  # 1 hit
    systems['F'].id = "replicon_id_F"
    systems['G'] = System(models['G'], [clusters['c4']],
                          cfg.redundancy_penalty())  # 4 hits
    systems['G'].id = "replicon_id_G"
    systems['H'] = System(models['H'], [clusters['c5']],
                          cfg.redundancy_penalty())  # 2 hits
    systems['H'].id = "replicon_id_H"
    systems['I'] = System(models['I'], [clusters['c8']],
                          cfg.redundancy_penalty())  # 2 hits
    systems['I'].id = "replicon_id_I"
    systems['J'] = System(models['J'], [clusters['c9']],
                          cfg.redundancy_penalty())  # 2 hits
    systems['J'].id = "replicon_id_J"
    systems['K'] = System(models['K'], [clusters['c10']],
                          cfg.redundancy_penalty())  # 2 hits
    systems['K'].id = "replicon_id_K"
    return systems
示例#6
0
def _build_systems(cfg, profile_factory):
    model_name = 'foo'
    model_location = ModelLocation(
        path=os.path.join(cfg.models_dir(), model_name))
    model_A = Model("foo/A", 10)
    model_B = Model("foo/B", 10)
    model_C = Model("foo/C", 10)
    model_D = Model("foo/D", 10)
    model_E = Model("foo/E", 10)
    model_F = Model("foo/F", 10)
    model_G = Model("foo/G", 10)
    model_H = Model("foo/H", 10)

    c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", profile_factory)
    gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
    c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", profile_factory)
    gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
    c_gene_flgB = CoreGene(model_location, "flgB", profile_factory)
    gene_flgB = ModelGene(c_gene_flgB, model_B)
    c_gene_tadZ = CoreGene(model_location, "tadZ", profile_factory)
    gene_tadZ = ModelGene(c_gene_tadZ, model_B)

    c_gene_sctn = CoreGene(model_location, "sctN", profile_factory)
    gene_sctn = ModelGene(c_gene_sctn, model_A)
    gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
    gene_sctn.add_exchangeable(gene_sctn_hom)

    c_gene_sctj = CoreGene(model_location, "sctJ", profile_factory)
    gene_sctj = ModelGene(c_gene_sctj, model_A)
    gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
    gene_sctj.add_exchangeable(gene_sctj_an)

    c_gene_gspd = CoreGene(model_location, "gspD", profile_factory)
    gene_gspd = ModelGene(c_gene_gspd, model_A)
    gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
    gene_gspd.add_exchangeable(gene_gspd_an)

    c_gene_abc = CoreGene(model_location, "abc", profile_factory)
    gene_abc = ModelGene(c_gene_abc, model_A)
    gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
    gene_abc.add_exchangeable(gene_abc_ho)

    model_A.add_mandatory_gene(gene_sctn)
    model_A.add_mandatory_gene(gene_sctj)
    model_A.add_accessory_gene(gene_gspd)
    model_A.add_forbidden_gene(gene_abc)

    model_B.add_mandatory_gene(gene_sctn_flg)
    model_B.add_mandatory_gene(gene_sctj_flg)
    model_B.add_accessory_gene(gene_flgB)
    model_B.add_accessory_gene(gene_tadZ)

    model_C.add_mandatory_gene(gene_sctn_flg)
    model_C.add_mandatory_gene(gene_sctj_flg)
    model_C.add_mandatory_gene(gene_flgB)
    model_C.add_accessory_gene(gene_tadZ)
    model_C.add_accessory_gene(gene_gspd)

    model_D.add_mandatory_gene(gene_abc)
    model_D.add_accessory_gene(gene_sctn)

    model_E.add_accessory_gene(gene_gspd)

    model_F.add_mandatory_gene(gene_abc)

    # idem as C
    model_G.add_mandatory_gene(gene_sctn_flg)
    model_G.add_mandatory_gene(gene_sctj_flg)
    model_G.add_mandatory_gene(gene_flgB)
    model_G.add_accessory_gene(gene_tadZ)
    model_G.add_accessory_gene(gene_gspd)

    # idem as D
    model_H.add_mandatory_gene(gene_abc)
    model_H.add_accessory_gene(gene_sctn)

    h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                 1.0, 10, 20)
    h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0, 1.0, 1.0,
                 1.0, 10, 20)
    h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0, 1.0, 1.0,
                 1.0, 10, 20)

    h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 4,
                     1.0, 1.0, 1.0, 1.0, 10, 20)
    h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 5, 1.0, 1.0, 1.0,
                 1.0, 10, 20)
    h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 6, 1.0, 1.0, 1.0,
                 1.0, 10, 20)

    h_abc = Hit(c_gene_abc, "hit_abc", 803, "replicon_id", 7, 1.0, 1.0, 1.0,
                1.0, 10, 20)

    model_A._min_mandatory_genes_required = 2
    model_A._min_genes_required = 2
    hit_weights = HitWeight(**cfg.hit_weights())
    c1 = Cluster([
        ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
        ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
        ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
    ], model_A, hit_weights)

    c2 = Cluster([
        ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
        ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
    ], model_A, hit_weights)

    model_B._min_mandatory_genes_required = 1
    model_B._min_genes_required = 2
    c3 = Cluster([
        ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
        ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
        ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
    ], model_B, hit_weights)
    model_C._min_mandatory_genes_required = 1
    model_C._min_genes_required = 2
    c4 = Cluster([
        ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
        ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
        ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY),
        ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
    ], model_C, hit_weights)
    model_D._min_mandatory_genes_required = 1
    model_D._min_genes_required = 1
    c5 = Cluster([
        ValidHit(h_abc, gene_abc, GeneStatus.MANDATORY),
        ValidHit(h_sctn, gene_sctn, GeneStatus.ACCESSORY)
    ], model_D, hit_weights)

    model_E._min_mandatory_genes_required = 0
    model_E._min_genes_required = 1
    c6 = Cluster([ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)], model_E,
                 hit_weights)

    model_F._min_mandatory_genes_required = 1
    model_F._min_genes_required = 1
    c7 = Cluster([ValidHit(h_abc, gene_abc, GeneStatus.MANDATORY)], model_F,
                 hit_weights)

    systems = {}

    systems['A'] = System(model_A, [c1, c2],
                          cfg.redundancy_penalty())  # 5 hits
    # we need to tweek the replicon_id to have stable ressults
    # whatever the number of tests ran
    # or the tests order
    systems['A'].id = "replicon_id_A"
    systems['B'] = System(model_B, [c3], cfg.redundancy_penalty())  # 3 hits
    systems['B'].id = "replicon_id_B"
    systems['C'] = System(model_C, [c4], cfg.redundancy_penalty())  # 4 hits
    systems['C'].id = "replicon_id_C"
    systems['D'] = System(model_D, [c5], cfg.redundancy_penalty())  # 2 hits
    systems['D'].id = "replicon_id_D"
    systems['E'] = System(model_E, [c6], cfg.redundancy_penalty())  # 1 hit
    systems['E'].id = "replicon_id_E"
    systems['F'] = System(model_F, [c7], cfg.redundancy_penalty())  # 1 hit
    systems['F'].id = "replicon_id_F"
    systems['G'] = System(model_G, [c4], cfg.redundancy_penalty())  # 4 hits
    systems['G'].id = "replicon_id_G"
    systems['H'] = System(model_H, [c5], cfg.redundancy_penalty())  # 2 hits
    systems['H'].id = "replicon_id_H"

    return systems