Пример #1
0
    def test_classification_with_colon(self):
        # since SMCOG id and description are stored in a string separated by :,
        # ensure that descriptions containing : are properly handled
        # test gene is AQF52_5530 from CP013129.1
        translation = (
            "MDTHQREEDPVAARRDRTHYLYLAVIGAVLLGIAVGFLAPGVAVELKPLGTGFVN"
            "LIKMMISPIIFCTIVLGVGSVRKAAKVGAVGGLALGYFLVMSTVALAIGLLVGNL"
            "LEPGSGLHLTKEIAEAGAKQAEGGGESTPDFLLGIIPTTFVSAFTEGEVLQTLLV"
            "ALLAGFALQAMGAAGEPVLRGIGHIQRLVFRILGMIMWVAPVGAFGAIAAVVGAT"
            "GAAALKSLAVIMIGFYLTCGLFVFVVLGAVLRLVAGINIWTLLRYLGREFLLILS"
            "TSSSESALPRLIAKMEHLGVSKPVVGITVPTGYSFNLDGTAIYLTMASLFVAEAM"
            "GDPLSIGEQISLLVFMIIASKGAAGVTGAGLATLAGGLQSHRPELVDGVGLIVGI"
            "DRFMSEARALTNFAGNAVATVLVGTWTKEIDKARVTEVLAGNIPFDEKTLVDDHA"
            "PVPVPDQRAEGGEEKARAGV")
        cds = helpers.DummyCDS(0, len(translation))
        cds.translation = translation
        results = smcogs.classify("test", [cds], get_config())
        assert results.best_hits[cds.get_name(
        )].hit_id == "SMCOG1212:sodium:dicarboxylate symporter"
        record = helpers.DummyRecord(seq=translation)
        record.add_cds_feature(cds)
        record.add_protocluster(helpers.DummyProtocluster(0, len(translation)))

        # if we don't handle multiple semicolons right, this line will crash
        results.add_to_record(record)
        gene_functions = cds.gene_functions.get_by_tool("smcogs")
        assert len(gene_functions) == 1
        assert str(gene_functions[0]).startswith(
            "transport (smcogs) SMCOG1212:sodium:dicarboxylate symporter"
            " (Score: 416; E-value: 2.3e-126)")
Пример #2
0
    def test_annotations(self):
        results = smcogs.classify(self.record.id,
                                  self.record.get_cds_features(), self.options)
        results.add_to_record(self.record)

        for cds in self.record.get_cds_features():
            if cds.gene_functions.get_by_tool("rule-based-clusters"):
                continue
            assert cds.gene_function == results.function_mapping.get(
                cds.get_name(), GeneFunction.OTHER)
Пример #3
0
    def test_results_reconstruction(self):
        results = smcogs.classify(self.record.id,
                                  self.record.get_cds_features(), self.options)
        assert results.tool == "smcogs"
        assert results.best_hits[
            "nisB"].hit_id == 'SMCOG1155:Lantibiotic dehydratase domain protein'

        json = results.to_json()
        assert json["best_hits"]["nisB"][
            0] == 'SMCOG1155:Lantibiotic dehydratase domain protein'

        reconstructed = core.FunctionResults.from_json(json, self.record)
        assert reconstructed.tool == "smcogs"
        assert reconstructed.best_hits[
            "nisB"].hit_id == 'SMCOG1155:Lantibiotic dehydratase domain protein'
        assert reconstructed.to_json() == json