def list_models(args): """ :param args: The command line argument once parsed :type args: :class:`argparse.Namespace` object :return: a string representation of all models and submodels installed. :rtype: str """ config = Config(MacsyDefaults(), args) registry = ModelRegistry() models_loc_available = scan_models_dir( config.models_dir(), profile_suffix=config.profile_suffix(), relative_path=config.relative_path()) for model_loc in models_loc_available: registry.add(model_loc) return str(registry)
def _find_all_installed_packages() -> ModelRegistry: """ :return: all models installed """ defaults = MacsyDefaults() config = Config(defaults, argparse.Namespace()) system_model_dir = config.models_dir() user_model_dir = os.path.join(os.path.expanduser('~'), '.macsyfinder', 'data') model_dirs = (system_model_dir, user_model_dir) if os.path.exists(user_model_dir) else (system_model_dir,) registry = ModelRegistry() for model_dir in model_dirs: try: for model_loc in scan_models_dir(model_dir, profile_suffix=config.profile_suffix): registry.add(model_loc) except PermissionError as err: _log.warning(f"{model_dir} is not readable: {err} : skip it.") return registry
def _find_all_installed_packages(models_dir=None) -> ModelRegistry: """ :return: all models installed """ defaults = MacsyDefaults() args = argparse.Namespace() if models_dir is not None: args.models_dir = models_dir config = Config(defaults, args) model_dirs = config.models_dir() registry = ModelRegistry() for model_dir in model_dirs: try: for model_loc in scan_models_dir( model_dir, profile_suffix=config.profile_suffix()): registry.add(model_loc) except PermissionError as err: _log.warning(f"{model_dir} is not readable: {err} : skip it.") return registry
class TestModelParser(MacsyTest): def setUp(self): defaults = MacsyDefaults() self.args = argparse.Namespace() self.args.sequence_db = self.find_data("base", "test_1.fasta") self.args.db_type = 'gembase' self.args.models_dir = self.find_data('models') self.args.res_search_dir = tempfile.gettempdir() self.cfg = Config(defaults, self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.profile_factory = ProfileFactory(self.cfg) self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir()) except: pass def test_parse_with_exchangeable(self): model_name = 'model_1' model_family = 'foo' fqn = f"{model_family}/{model_name}" #def_2_parse = set() #def_2_parse.add(fqn) models_2_detect = [self.model_registry['foo'].get_definition(fqn)] self.parser.parse(models_2_detect) self.assertEqual(len(self.model_bank), 1) m1 = self.model_bank[fqn] self.assertEqual(m1.name, model_name) self.assertEqual(m1.fqn, fqn) self.assertEqual(m1.inter_gene_max_space, 20) self.assertEqual(m1.min_mandatory_genes_required, 2) self.assertEqual(m1.min_genes_required, 4) self.assertTrue(m1.multi_loci) self.assertEqual(len(m1.mandatory_genes), 2) mandatory_genes_name = sorted([g.name for g in m1.mandatory_genes]) theoric_list = sorted(["sctJ_FLG", "sctN_FLG"]) self.assertListEqual(mandatory_genes_name, theoric_list) self.assertEqual(len(m1.accessory_genes), 2) accessory_genes_name = sorted([g.name for g in m1.accessory_genes]) theoric_list = sorted(["flgB", "flgC"]) self.assertListEqual(accessory_genes_name, theoric_list) self.assertEqual(len(m1.neutral_genes), 2) neutral_genes_name = sorted([g.name for g in m1.neutral_genes]) theoric_list = sorted(["fliE", "tadZ"]) self.assertListEqual(neutral_genes_name, theoric_list) self.assertEqual(len(m1.forbidden_genes), 1) forbidden_genes_name = sorted([g.name for g in m1.forbidden_genes]) theoric_list = sorted(["sctC"]) self.assertListEqual(forbidden_genes_name, theoric_list) sctJ_FLG = m1.get_gene('sctJ_FLG') sctJ_FLG_exchangeables = sctJ_FLG.exchangeables self.assertEqual(len(sctJ_FLG_exchangeables), 2) self.assertEqual(sctJ_FLG_exchangeables[0].name, 'sctJ') self.assertEqual(sctJ_FLG_exchangeables[1].name, 'abc') self.assertTrue(isinstance(sctJ_FLG_exchangeables[0], Exchangeable)) self.assertTrue(isinstance(sctJ_FLG_exchangeables[0]._gene, CoreGene)) self.assertTrue( isinstance(sctJ_FLG_exchangeables[0].alternate_of(), ModelGene)) self.assertTrue(sctJ_FLG_exchangeables[0].loner) self.assertFalse(sctJ_FLG.is_exchangeable) sctJ = m1.get_gene('sctJ') self.assertTrue(sctJ.is_exchangeable) def test_exchangeable_inheritance(self): def_2_parse = set() def_2_parse.add('foo/model_1') models_2_detect = [ self.model_registry['foo'].get_definition('foo/model_1') ] self.parser.parse(models_2_detect) m1 = self.model_bank['foo/model_1'] sctJ = m1.get_gene('sctJ') self.assertTrue(sctJ.is_exchangeable) self.assertTrue(sctJ.loner) self.assertTrue(sctJ.multi_system) self.assertFalse(sctJ.multi_model) sctJ_FLG = m1.get_gene('sctJ_FLG') self.assertTrue(sctJ_FLG.multi_system) abc = m1.get_gene('abc') self.assertFalse(abc.multi_system) sctN = m1.get_gene('sctN') sctN_FLG = m1.get_gene('sctN_FLG') self.assertFalse(sctN_FLG.loner) self.assertTrue(sctN.loner) self.assertIsNone(sctN_FLG.inter_gene_max_space) self.assertEqual(sctN.inter_gene_max_space, 10) self.assertFalse(sctN_FLG.multi_model) self.assertFalse(sctN.multi_model) gspD = m1.get_gene('gspD') self.assertFalse(sctN_FLG.multi_system) self.assertTrue(gspD.multi_model) self.assertTrue(gspD.multi_system) def test_model_w_unkown_attr(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/model_w_unknown_attribute') ] with self.assertRaises(MacsypyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "unable to parse model definition 'foo/model_w_unknown_attribute' : " "The model definition model_w_unknown_attribute.xml has an unknow attribute 'multi-loci'. " "Please fix the definition.") def test_gene_w_unkown_attr(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/gene_w_unknown_attribute') ] with self.assertRaises(MacsypyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "unable to parse model definition 'foo/gene_w_unknown_attribute' : " "The model definition gene_w_unknown_attribute.xml has an unknown attribute 'multi-system' for a gene." " Please fix the definition.") def test_wo_presence(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/fail_wo_presence') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition 'foo/fail_wo_presence': gene 'sctN_FLG' without presence" ) def test_invalid_presence(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/fail_invalid_presence') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model 'fail_invalid_presence' definition: presence value must be either: " "'mandatory', 'accessory', 'neutral', 'forbidden' not foo_bar") def test_gene_no_name(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/gene_no_name') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition 'foo/gene_no_name': gene without name") def test_invalid_homolog(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/invalid_homolog') ] with self.assertRaises(MacsypyError) as context: self.parser.parse(model_2_detect) self.assertEqual(str(context.exception), "'foo/foo_bar': No such profile") def test_invalid_homolog_2(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/invalid_homolog_2') ] with self.assertRaises(SyntaxError) as ctx: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(ctx.exception), "Invalid model definition 'foo/invalid_homolog_2': gene without name" ) def test_bad_min_genes_required(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_genes_required') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), 'model \'bad_min_genes_required\' is not consistent: min_genes_required 16 must be lesser ' 'or equal than the number of "accessory" and "mandatory" components in the model: 6' ) def test_bad_min_genes_required_2(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_genes_required_2') ] with self.catch_log(): with self.assertRaisesRegex( SyntaxError, "Invalid model definition (.*): " "min_genes_required must be an integer: 16.5"): self.parser.parse(model_2_detect) def test_bad_min_mandatory_genes_required(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required') ] with self.catch_log(): with self.assertRaises(ModelInconsistencyError) as context: self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), 'model \'bad_min_mandatory_genes_required\' is not consistent: min_genes_required 16 must ' 'be lesser or equal than the number of "accessory" and "mandatory" components in the model: 6' ) def test_bad_min_mandatory_genes_required_2(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required_2') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): # error raised by System initialization # which occur before check_consistency # the last test : not(model.min_mandatory_genes_required <= model.min_genes_required) # seems to be useless self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "foo/bad_min_mandatory_genes_required_2: min_genes_required '6' must be greater or equal" " than min_mandatory_genes_required '8'") def test_bad_min_mandatory_genes_required_4(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required_4') ] with self.assertRaisesRegex( SyntaxError, "Invalid model definition (.*): " "min_mandatory_genes_required must be an integer: 12.5"): with self.catch_log(): self.parser.parse(model_2_detect) def test_min_mandatory_genes_required_lesser_than_mandatory_genes(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/bad_min_mandatory_genes_required_3') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "model 'bad_min_mandatory_genes_required_3' is not consistent:" " 'min_mandatory_genes_required': 6 must be lesser or equal than the number of 'mandatory' " "components in the model: 5") def test_only_one_accessory(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/only_one_accessory') ] with self.assertRaises(ModelInconsistencyError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual(str(context.exception), f"model 'only_one_accessory' is not consistent: there is only one gene in your model. " \ f"So its status should be 'mandatory'.") def test_bad_max_nb_genes(self): model_2_detect = [ self.model_registry['foo'].get_definition('foo/bad_max_nb_genes') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) model_name, def_name = model_2_detect[0].split_fqn( model_2_detect[0].fqn) self.assertEqual( str(context.exception), "Invalid model definition ({0}.xml): max_nb_genes must be an integer: HOHOHO" .format( os.path.join(self.cfg.models_dir()[0], model_name, 'definitions', def_name))) def test_bad_inter_gene_max_space(self): fqn = 'foo/bad_inter_gene_max_space' model_family, model_name = fqn.split('/') model_2_detect = [self.model_registry['foo'].get_definition(fqn)] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition ({}): inter_gene_max_space must be an integer: 12.5" .format( os.path.join(self.cfg.models_dir()[0], model_family, 'definitions', model_name + ".xml"))) def test_no_inter_gene_max_space(self): model_2_detect = [ self.model_registry['foo'].get_definition( 'foo/no_inter_gene_max_space') ] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition ({}): inter_gene_max_space must be defined" .format( os.path.join(self.cfg.models_dir()[0], "foo/definitions/no_inter_gene_max_space.xml"))) def test_loner(self): model_fqn = 'foo/model_5' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m5 = self.model_bank[model_fqn] m5_flgC = m5.get_gene('flgC') self.assertFalse(m5_flgC.loner) m5_tadZ = m5.get_gene('tadZ') self.assertTrue(m5_tadZ.loner) model_fqn = 'foo/model_6' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m6 = self.model_bank[model_fqn] m6_flgC = m6.get_gene('flgC') self.assertFalse(m6_flgC.loner) m6_tadZ = m6.get_gene('tadZ') self.assertFalse(m6_tadZ.loner) def test_multi_system(self): model_fqn = 'foo/model_5' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m = self.model_bank[model_fqn] flgC = m.get_gene('flgC') self.assertFalse(flgC.multi_system) fliE = m.get_gene('fliE') self.assertTrue(fliE.multi_system) def test_multi_model(self): model_fqn = 'foo/model_5' model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)] self.parser.parse(model_2_detect) m = self.model_bank[model_fqn] flgC = m.get_gene('flgC') self.assertFalse(flgC.multi_model) abc = m.get_gene('abc') self.assertTrue(abc.multi_model) def test_gene_inter_gene_max_space(self): model_fqn = ['foo/model_5', 'foo/model_6'] models_2_detect = [ self.model_registry['foo'].get_definition(fqn) for fqn in model_fqn ] self.parser.parse(models_2_detect) m5 = self.model_bank['foo/model_5'] self.assertEqual(m5.name, 'model_5') self.assertEqual(m5.fqn, 'foo/model_5') self.assertEqual(m5.inter_gene_max_space, 20) m5_flgB = m5.get_gene('flgB') m5_flgC = m5.get_gene('flgC') self.assertIsNone(m5_flgB.inter_gene_max_space) self.assertEqual(m5_flgC.inter_gene_max_space, 2) m6 = self.model_bank['foo/model_6'] m6_flgC = m6.get_gene('flgC') self.assertEqual(m6_flgC.inter_gene_max_space, 12) def test_inter_gene_max_space_cfg(self): # test inter_gene_max_space is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' inter_gene_max_space_cfg = [[model_fqn, '222']] self.args.inter_gene_max_space = inter_gene_max_space_cfg self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.inter_gene_max_space, 222) def test_min_mandatory_genes_required_cfg(self): # test min_mandatory_genes_required is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' min_mandatory_genes_required = [[model_fqn, '3']] self.args.min_mandatory_genes_required = min_mandatory_genes_required self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.min_mandatory_genes_required, 3) def test_min_genes_required_cfg(self): # test min_genes_required is specified from configuration # so this value must overload the value read from xml def_2_parse = set() model_fqn = 'foo/model_5' def_2_parse.add(model_fqn) parsed = set() min_genes_required = [[model_fqn, '4']] self.args.min_genes_required = min_genes_required self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.min_genes_required, 4) def test_max_nb_genes_cfg(self): self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) # max_nb_genes is specified in xml # no user configuration on this self.cfg = Config(MacsyDefaults(), self.args) model_fqn = 'foo/model_6' # 4 genes in this model but xml specify 3 self.cfg = Config(MacsyDefaults(), self.args) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.max_nb_genes, 3) # max_nb_genes is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' # 4 genes in this model max_nb_genes = [[model_fqn, '6']] self.args.max_nb_genes = max_nb_genes self.cfg = Config(MacsyDefaults(), self.args) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertEqual(m.max_nb_genes, 6) def test_multi_loci_cfg(self): # test multi_loci is specified from configuration # so this value must overload the value read from xml model_fqn = 'foo/model_5' self.args.multi_loci = model_fqn self.cfg = Config(MacsyDefaults(), self.args) self.model_bank = ModelBank() self.gene_bank = GeneBank() self.model_registry = ModelRegistry() models_location = scan_models_dir(self.args.models_dir) for ml in models_location: self.model_registry.add(ml) self.parser = DefinitionParser(self.cfg, self.model_bank, self.gene_bank, self.model_registry, self.profile_factory) models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] self.parser.parse(models_2_detect) m = self.model_bank[model_fqn] self.assertTrue(m.multi_loci) def test_bad_gene_inter_gene_max_space_2(self): model_fqn = 'foo/bad_inter_gene_max_space_2' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.assertRaises(SyntaxError) as ctx: with self.catch_log(): self.parser.parse(models_2_detect) self.assertEqual( str(ctx.exception), "Invalid model definition 'bad_inter_gene_max_space_2': " "inter_gene_max_space must be an integer: 2.5") def test_bad_exchangeable_inter_gene_max_space(self): fqn = 'foo/bad_exchangeable_inter_gene_max_space' model_2_detect = [self.model_registry['foo'].get_definition(fqn)] with self.assertRaises(SyntaxError) as context: with self.catch_log(): self.parser.parse(model_2_detect) self.assertEqual( str(context.exception), "Invalid model definition 'bad_exchangeable_inter_gene_max_space': " "inter_gene_max_space must be an integer: 1.5") def test_parse_model_old_syntax(self): # the attribute vers is not set model_fqn = 'foo/model_old_1' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, "unable to parse model definition 'foo/model_old_1' : " "The model definition model_old_1.xml is not versioned. Please update your model." ) # the root is system instead of mmodel model_fqn = 'foo/model_old_2' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_2.xml is obsolete. Please update your model." ) # there still system_ref attribute model_fqn = 'foo/model_old_3' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_3.xml is obsolete. Please update your model." ) # there still homologs tag model_fqn = 'foo/model_old_4' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_4.xml is obsolete. Please update your model." ) # there still analogs tag model_fqn = 'foo/model_old_5' models_2_detect = [ self.model_registry['foo'].get_definition(model_fqn) ] with self.catch_log(log_name='macsypy') as log: with self.assertRaises(MacsypyError) as ctx: self.parser.parse(models_2_detect) log_msg = log.get_value().strip() self.assertEqual( log_msg, f"unable to parse model definition '{model_fqn}' : " "The model definition model_old_5.xml is obsolete. Please update your model." )
class SerializationTest(MacsyTest): def setUp(self) -> None: args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) self.hit_weights = HitWeight(**self.cfg.hit_weights()) # reset the uniq id number for AbstractSetOfHits # to have predictable results AbstractSetOfHits._id = itertools.count(1) def test_SystemSerializer_str(self): model_name = 'foo' model_location = ModelLocation( path=os.path.join(self.cfg.models_dir(), model_name)) model_A = Model("foo/A", 10) model_B = Model("foo/B", 10) c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", self.profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory) c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model_A) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_forbidden_gene(gene_abc) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_gspd) model_B.add_accessory_gene(gene_tadZ) h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 c1 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_A, self.hit_weights) c2 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) ], model_A, self.hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c3 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_B, self.hit_weights) sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty()) sys_A.id = "sys_id_A" sys_B = System(model_B, [c3], self.cfg.redundancy_penalty()) sys_B.id = "sys_id_B" hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B]) system_serializer = TxtSystemSerializer() sys_str = f"""system id = {sys_A.id} model = foo/A replicon = replicon_id clusters = [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1), ('hit_gspd', 'gspD', 1)], [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1)] occ = 2 wholeness = 1.000 loci nb = 2 score = 1.500 mandatory genes: \t- sctN: 2 (sctN, sctN) \t- sctJ: 2 (sctJ, sctJ) accessory genes: \t- gspD: 1 (gspD [sys_id_B]) neutral genes: """ self.assertEqual( sys_str, system_serializer.serialize(sys_A, hit_multi_sys_tracker)) def test_SystemSerializer_tsv(self): model = Model("foo/T2SS", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model) c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_flg) model.add_accessory_gene(gene_sctn) h_gspd = Hit(c_gene_gspd, "h_gspd", 803, "replicon_id", 10, 1.0, 1.0, 1.0, 1.0, 10, 20) v_h_gspd = ValidHit(h_gspd, gene_gspd, GeneStatus.MANDATORY) h_sctj = Hit(c_gene_sctj, "h_sctj", 803, "replicon_id", 20, 1.0, 1.0, 1.0, 1.0, 20, 30) v_h_sctj = ValidHit(h_sctj, gene_sctj, GeneStatus.ACCESSORY) h_sctn_flg = Hit(c_gene_sctn_flg, "h_sctn_flg", 803, "replicon_id", 30, 1.0, 1.0, 1.0, 1.0, 30, 40) v_h_sctn_flg = ValidHit(h_sctn_flg, gene_sctn_flg, GeneStatus.ACCESSORY) c1 = Cluster([v_h_gspd, v_h_sctj], model, self.hit_weights) c2 = Cluster([v_h_sctn_flg], model, self.hit_weights) sys_multi_loci = System(model, [c1, c2], self.cfg.redundancy_penalty()) hit_multi_sys_tracker = HitSystemTracker([sys_multi_loci]) system_serializer = TsvSystemSerializer() sys_tsv = "\t".join([ "replicon_id", "h_gspd", "gspD", "10", "foo/T2SS", sys_multi_loci.id, "1", "1.000", "1.900", "1", "gspD", "mandatory", "803", "1.0", "1.000", "1.000", "1.000", "10", "20", "" ]) sys_tsv += "\n" sys_tsv += "\t".join([ "replicon_id", "h_sctj", "sctJ", "20", "foo/T2SS", sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctJ", "accessory", "803", "1.0", "1.000", "1.000", "1.000", "20", "30", "" ]) sys_tsv += "\n" sys_tsv += "\t".join([ "replicon_id", "h_sctn_flg", "sctN_FLG", "30", "foo/T2SS", sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctN", "accessory", "803", "1.0", "1.000", "1.000", "1.000", "30", "40", "" ]) sys_tsv += "\n" self.assertEqual( sys_tsv, system_serializer.serialize(sys_multi_loci, hit_multi_sys_tracker)) def test_SolutionSerializer_tsv(self): model_name = 'foo' model_location = ModelLocation( path=os.path.join(self.cfg.models_dir(), model_name)) model_A = Model("foo/A", 10) model_B = Model("foo/B", 10) c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", self.profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory) gene_flgB = ModelGene(c_gene_flgB, model_B) c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model_A) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_forbidden_gene(gene_abc) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_flgB) model_B.add_accessory_gene(gene_tadZ) h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 c1 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_A, self.hit_weights) c2 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) ], model_A, self.hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c3 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY) ], model_B, self.hit_weights) sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty()) sys_A.id = "sys_id_A" sys_B = System(model_B, [c3], self.cfg.redundancy_penalty()) sys_B.id = "sys_id_B" sol = [sys_A, sys_B] sol_id = '12' hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B]) system_serializer = TsvSolutionSerializer() sol_tsv = '\t'.join([ sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" ser = system_serializer.serialize(sol, sol_id, hit_multi_sys_tracker) self.assertEqual(ser, sol_tsv) def test_LikelySystemSerializer_txt(self): model = Model("foo/FOO", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model) model.add_accessory_gene(gene_sctn) c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model) model.add_forbidden_gene(gene_abc) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY) hit_4 = Hit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_4 = ValidHit(hit_4, gene_abc, GeneStatus.FORBIDDEN) ls_1 = LikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [v_hit_4]) hit_multi_sys_tracker = HitSystemTracker([ls_1]) ser = TxtLikelySystemSerializer() txt = ser.serialize(ls_1, hit_multi_sys_tracker) expected_txt = """This replicon contains genetic materials needed for system foo/FOO WARNING there quorum is reached but there is also some forbidden genes. system id = replicon_id_FOO_1 model = foo/FOO replicon = replicon_id hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)] wholeness = 1.000 mandatory genes: \t- gspD: 1 (gspD) accessory genes: \t- sctJ: 1 (sctJ) \t- sctN: 1 (sctN) neutral genes: forbidden genes: \t- abc: 1 (abc) Use ordered replicon to have better prediction. """ self.assertEqual(txt, expected_txt) def test_UnlikelySystemSerializer_txt(self): model = Model("foo/FOO", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model) model.add_accessory_gene(gene_sctn) c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model) model.add_forbidden_gene(gene_abc) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY) hit_4 = Hit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_4 = ValidHit(hit_4, gene_abc, GeneStatus.FORBIDDEN) ser = TxtUnikelySystemSerializer() ls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [v_hit_4], ["the reason why"]) txt = ser.serialize(ls_1) expected_txt = """This replicon probably not contains a system foo/FOO: the reason why system id = replicon_id_FOO_1 model = foo/FOO replicon = replicon_id hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)] wholeness = 1.000 mandatory genes: \t- gspD: 1 (gspD) accessory genes: \t- sctJ: 1 (sctJ) \t- sctN: 1 (sctN) neutral genes: forbidden genes: \t- abc: 1 (abc) Use ordered replicon to have better prediction. """ self.assertEqual(txt, expected_txt)
class ModelRegistryTest(MacsyTest): def setUp(self): self.tmp_dir = tempfile.mkdtemp() registries._prefix_data = self.tmp_dir self.root_models_dir = os.path.join(self.tmp_dir, 'macsyfinder', 'models') self.cfg = Config(MacsyDefaults(models_dir=self.root_models_dir), argparse.Namespace()) os.makedirs(self.root_models_dir) simple_models = { 'name': 'simple', 'profiles': ('prof_1.hmm', 'prof_2.hmm'), 'not_profiles': ('not_a_profile', ), 'definitions': { 'def_1.xml': None, 'def_2.xml': None }, 'not_definitions': { 'not_a_def': None }, } complex_models = { 'name': 'complex', 'profiles': ('prof_1.hmm', 'prof_2.hmm'), 'not_profiles': ('not_a_profile', ), 'definitions': { 'subdef_1': { 'def_1_1.xml': None, 'def_1_2.xml': None }, 'subdef_2': { 'def_2_1.xml': None, 'def_2_2.xml': None }, }, 'not_definitions': { 'subdef_1': { 'not_a_def': None }, 'subdef_2': { 'not_a_def': None } }, } self.simple_dir = _create_fake_models_tree(self.root_models_dir, simple_models) self.complex_dir = _create_fake_models_tree(self.root_models_dir, complex_models) def tearDown(self): try: shutil.rmtree(self.cfg.working_dir) except: pass try: shutil.rmtree(self.tmp_dir) except: pass def test_scan_models_dir(self): models_location = scan_models_dir(self.cfg.models_dir()) models_location_expected = [ ModelLocation(path=self.simple_dir, profile_suffix='.hmm', relative_path=False), ModelLocation(path=self.complex_dir, profile_suffix='.hmm', relative_path=False), ] self.assertListEqual(sorted(models_location_expected), sorted(models_location)) def test_add_get(self): mr = ModelRegistry() model_complex_expected = ModelLocation(path=self.complex_dir) with self.assertRaises(KeyError) as ctx: mr[model_complex_expected.name] self.assertEqual(str(ctx.exception), '"No such model definition: \'complex\'"') mr.add(model_complex_expected) self.assertEqual(model_complex_expected, mr[model_complex_expected.name]) def test_models(self): mr = ModelRegistry() model_complex_expected = ModelLocation(path=self.complex_dir) model_simple_expected = ModelLocation(path=self.simple_dir) mr.add(model_complex_expected) mr.add(model_simple_expected) models_received = sorted(mr.models()) models_expected = sorted( [model_complex_expected, model_simple_expected]) self.assertListEqual(models_received, models_expected) def test_str(self): mr = ModelRegistry() model_complex_expected = ModelLocation(path=self.complex_dir) model_simple_expected = ModelLocation(path=self.simple_dir) mr.add(model_complex_expected) mr.add(model_simple_expected) expected_output = """complex /subdef_1 /def_1_1 /def_1_2 /subdef_2 /def_2_1 /def_2_2 simple /def_1 /def_2 """ self.assertEqual(expected_output, str(mr))
def do_install(args: argparse.Namespace) -> None: """ Install new models in macsyfinder local models repository. :param args: the arguments passed on the command line :type args: :class:`argparse.Namespace` object :rtype: None """ if os.path.exists(args.package): remote = False pack_name, inst_vers = parse_arch_path(args.package) user_req = requirements.Requirement(f"{pack_name}=={inst_vers}") else: remote = True user_req = requirements.Requirement(args.package) pack_name = user_req.name inst_pack_loc = _find_installed_package(pack_name) if inst_pack_loc: pack = Package(inst_pack_loc.path) try: local_vers = version.Version(pack.metadata['vers']) except FileNotFoundError: _log.error(f"{pack_name} locally installed is corrupted.") _log.warning(f"You can fix it by removing '{inst_pack_loc.path}'.") sys.tracebacklimit = 0 raise RuntimeError() from None else: local_vers = None user_specifier = user_req.specifier if not user_specifier and inst_pack_loc: # the user do not request for a specific version # and there already a version installed locally user_specifier = specifiers.SpecifierSet(f">{local_vers}") if remote: try: all_available_versions = _get_remote_available_versions(pack_name, args.org) except (ValueError, MacsyDataLimitError) as err: _log.error(str(err)) sys.tracebacklimit = 0 raise ValueError from None else: all_available_versions = [inst_vers] compatible_version = list(user_specifier.filter(all_available_versions)) if not compatible_version and local_vers: target_vers = version.Version(all_available_versions[0]) if target_vers == local_vers and not args.force: _log.warning(f"Requirement already satisfied: {pack_name}{user_specifier} in {pack.path}.\n" f"To force installation use option -f --force-reinstall.") return None elif target_vers < local_vers and not args.force: _log.warning(f"{pack_name} ({local_vers}) is already installed.\n" f"To downgrade to {target_vers} use option -f --force-reinstall.") return None else: # target_vers == local_vers and args.force: # target_vers < local_vers and args.force: pass elif not compatible_version: # No compatible version and not local version _log.warning(f"Could not find version that satisfied '{pack_name}{user_specifier}'") return None else: # it exists at least one compatible version target_vers = version.Version(compatible_version[0]) if inst_pack_loc: if target_vers > local_vers and not args.upgrade: _log.warning(f"{pack_name} ({local_vers}) is already installed but {target_vers} version is available.\n" f"To install it please run 'macsydata install --upgrade {pack_name}'") return None elif target_vers == local_vers and not args.force: _log.warning(f"Requirement already satisfied: {pack_name}{user_specifier} in {pack.path}.\n" f"To force installation use option -f --force-reinstall.") return None else: # target_vers > local_vers and args.upgrade: # I have to install a new package pass # if i'm here it's mean I have to install a new package if remote: _log.info(f"Downloading {pack_name} ({target_vers}).") model_index = RemoteModelIndex(org=args.org, cache=args.cache) _log.debug(f"call download with pack_name={pack_name}, vers={target_vers}") arch_path = model_index.download(pack_name, str(target_vers)) else: model_index = LocalModelIndex(cache=args.cache) arch_path = args.package _log.info(f"Extracting {pack_name} ({target_vers}).") cached_pack = model_index.unarchive_package(arch_path) if args.user: dest = os.path.realpath(os.path.join(os.path.expanduser('~'), '.macsyfinder', 'data')) if os.path.exists(dest) and not os.path.isdir(dest): raise RuntimeError("'{}' already exist and is not a directory.") elif not os.path.exists(dest): os.makedirs(dest) else: defaults = MacsyDefaults() config = Config(defaults, argparse.Namespace()) dest = config.models_dir() if inst_pack_loc: old_pack_path = f"{inst_pack_loc.path}.old" shutil.move(inst_pack_loc.path, old_pack_path) _log.info(f"Installing {pack_name} ({target_vers}) in {dest}") try: shutil.move(cached_pack, dest) except PermissionError as err: _log.error(f"{dest} is not writable: {err}") _log.warning(f"Maybe you can use --user option to install in your HOME.") sys.tracebacklimit = 0 raise ValueError() from None _log.info("Cleaning.") shutil.rmtree(pathlib.Path(cached_pack).parent) if inst_pack_loc: shutil.rmtree(old_pack_path) _log.info(f"The models {pack_name} ({target_vers}) have been installed successfully.")
def test_models_dir(self): self.parsed_args.models_dir = self.tmp_dir cfg = Config(self.defaults, self.parsed_args) self.assertEqual(cfg.models_dir(), [self.tmp_dir])
class SerializationTest(MacsyTest): def setUp(self) -> None: args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) self.hit_weights = HitWeight(**self.cfg.hit_weights()) # reset the uniq id number for AbstractUnordered # to have predictable results for (Likely/Unlikely)Systems System._id = itertools.count(1) AbstractUnordered._id = itertools.count(1) def test_SystemSerializer_str(self): model_name = 'foo' model_location = ModelLocation( path=os.path.join(self.cfg.models_dir()[0], model_name)) model_A = Model("foo/A", 10) model_B = Model("foo/B", 10) c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", self.profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory) c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model_A) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_forbidden_gene(gene_abc) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_gspd) model_B.add_accessory_gene(gene_tadZ) h_sctj = CoreHit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctn = CoreHit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_gspd = CoreHit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctj_flg = CoreHit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = CoreHit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 c1 = Cluster([ ModelHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ModelHit(h_sctn, gene_sctn, GeneStatus.MANDATORY), ModelHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_A, self.hit_weights) c2 = Cluster([ ModelHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ModelHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) ], model_A, self.hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c3 = Cluster([ ModelHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ModelHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ModelHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_B, self.hit_weights) sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty()) sys_A.id = "sys_id_A" sys_B = System(model_B, [c3], self.cfg.redundancy_penalty()) sys_B.id = "sys_id_B" hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B]) system_serializer = TxtSystemSerializer() sys_str = f"""system id = {sys_A.id} model = foo/A replicon = replicon_id clusters = [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1), ('hit_gspd', 'gspD', 1)], [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1)] occ = 2 wholeness = 1.000 loci nb = 2 score = 1.500 mandatory genes: \t- sctN: 2 (sctN, sctN) \t- sctJ: 2 (sctJ, sctJ) accessory genes: \t- gspD: 1 (gspD [sys_id_B]) neutral genes: """ self.assertEqual( sys_str, system_serializer.serialize(sys_A, hit_multi_sys_tracker)) def test_SystemSerializer_tsv(self): model = Model("foo/T2SS", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model, loner=True) c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_flg) model.add_accessory_gene(gene_sctn) #CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score, # profile_coverage, sequence_coverage, begin_match, end_match # pos score ch_gspd = CoreHit(c_gene_gspd, "h_gspd", 803, "replicon_id", 10, 1.0, 1.0, 1.0, 1.0, 10, 20) mh_gspd = ModelHit(ch_gspd, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY) ch_sctj = CoreHit(c_gene_sctj, "h_sctj", 803, "replicon_id", 20, 1.0, 1.0, 1.0, 1.0, 20, 30) mh_sctj = ModelHit(ch_sctj, gene_ref=gene_sctj, gene_status=GeneStatus.ACCESSORY) ch_sctn_flg = CoreHit(c_gene_sctn_flg, "h_sctn_flg", 803, "replicon_id", 40, 1.0, 1.0, 1.0, 1.0, 30, 40) mh_sctn_flg = ModelHit(ch_sctn_flg, gene_ref=gene_sctn_flg, gene_status=GeneStatus.ACCESSORY) ch_sctn = CoreHit(c_gene_sctn, "h_sctn", 803, "replicon_id", 80, 1.0, 1.0, 1.0, 1.0, 30, 40) mh_sctn = Loner(ch_sctn, gene_ref=gene_sctn, gene_status=GeneStatus.ACCESSORY, counterpart=[mh_sctn_flg]) c1 = Cluster([mh_gspd, mh_sctj], model, self.hit_weights) c2 = Cluster([mh_sctn], model, self.hit_weights) sys_multi_loci = System(model, [c1, c2], self.cfg.redundancy_penalty()) # score 1.5 .35 = 1.85 hit_multi_sys_tracker = HitSystemTracker([sys_multi_loci]) system_serializer = TsvSystemSerializer() sys_tsv = "\t".join([ "replicon_id", "h_gspd", "gspD", "10", "foo/T2SS", sys_multi_loci.id, "1", "1", "1.000", "1.850", "1", "gspD", "mandatory", "803", "1.0", "1.000", "1.000", "1.000", "10", "20", "", "" ]) sys_tsv += "\n" sys_tsv += "\t".join([ "replicon_id", "h_sctj", "sctJ", "20", "foo/T2SS", sys_multi_loci.id, "1", "1", "1.000", "1.850", "1", "sctJ", "accessory", "803", "1.0", "1.000", "1.000", "1.000", "20", "30", "", "" ]) sys_tsv += "\n" sys_tsv += "\t".join([ "replicon_id", "h_sctn", "sctN", "80", "foo/T2SS", sys_multi_loci.id, "1", "-1", "1.000", "1.850", "1", "sctN", "accessory", "803", "1.0", "1.000", "1.000", "1.000", "30", "40", "h_sctn_flg", "" ]) sys_tsv += "\n" self.maxDiff = None self.assertEqual( sys_tsv, system_serializer.serialize(sys_multi_loci, hit_multi_sys_tracker)) def test_SolutionSerializer_tsv(self): model_name = 'foo' model_location = ModelLocation( path=os.path.join(self.cfg.models_dir()[0], model_name)) ########### # Model B # ########### model_B = Model("foo/B", 10) c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", self.profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory) gene_flgB = ModelGene(c_gene_flgB, model_B) c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_flgB) model_B.add_accessory_gene(gene_tadZ) ########### # Model A # ########### model_A = Model("foo/A", 10) c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model_A, loner=True) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_accessory_gene(gene_abc) # CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score, # profile_coverage, sequence_coverage, begin_match, end_match # pos score h_sctj = CoreHit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) mh_sctj = ModelHit(h_sctj, gene_sctj, GeneStatus.MANDATORY) h_sctn = CoreHit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) mh_sctn = ModelHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) h_gspd = CoreHit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) mh_gspd = ModelHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) h_sctj_flg = CoreHit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 10, 1.0, 1.0, 1.0, 1.0, 10, 20) h_flgB = CoreHit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 11, 1.0, 1.0, 1.0, 1.0, 10, 20) h_abc = CoreHit(c_gene_abc, "hit_abc", 803, "replicon_id", 20, 1.0, 1.0, 1.0, 1.0, 10, 20) h_abc2 = CoreHit(c_gene_abc, "hit_abc2", 803, "replicon_id", 50, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = CoreHit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 40, 1.0, 1.0, 1.0, 1.0, 10, 20) mh_sctj_flg = ModelHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY) mh_flgB = ModelHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY) mh_abc = ModelHit(h_abc, gene_abc, GeneStatus.ACCESSORY) mh_abc2 = ModelHit(h_abc2, gene_abc, GeneStatus.ACCESSORY) mh_tadZ = ModelHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 c1 = Cluster([mh_sctj, mh_sctn, mh_gspd], model_A, self.hit_weights) c2 = Cluster([mh_sctj, mh_sctn], model_A, self.hit_weights) c3 = Cluster([ Loner(h_abc, gene_ref=gene_abc, gene_status=GeneStatus.ACCESSORY, counterpart=[mh_abc2]) ], model_A, self.hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c5 = Cluster([mh_sctj_flg, mh_tadZ, mh_flgB], model_B, self.hit_weights) sys_A = System(model_A, [c1, c2, c3], self.cfg.redundancy_penalty()) # score = 2.5, 2 , 0.35 = 4.85 - (2 * 1.5) = 1.85 sys_A.id = "sys_id_A" sys_B = System(model_B, [c5], self.cfg.redundancy_penalty()) # score = 2.0 sys_B.id = "sys_id_B" sol = Solution([sys_A, sys_B]) sol_id = '12' hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B]) sol_serializer = TsvSolutionSerializer() sol_tsv = '\t'.join([ sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1', '1.000', '1.850', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctn', 'sctN', '2', 'foo/A', 'sys_id_A', '2', '1', '1.000', '1.850', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_gspd', 'gspD', '3', 'foo/A', 'sys_id_A', '2', '1', '1.000', '1.850', '2', 'gspD', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '2', '1.000', '1.850', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctn', 'sctN', '2', 'foo/A', 'sys_id_A', '2', '2', '1.000', '1.850', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_abc', 'abc', '20', 'foo/A', 'sys_id_A', '2', '-1', '1.000', '1.850', '2', 'abc', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'hit_abc2', '' ]) sol_tsv += "\n" sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '10', 'foo/B', 'sys_id_B', '1', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_flgB', 'flgB', '11', 'foo/B', 'sys_id_B', '1', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_tadZ', 'tadZ', '40', 'foo/B', 'sys_id_B', '1', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', '' ]) sol_tsv += "\n" sol_tsv += "\n" ser = sol_serializer.serialize(sol, sol_id, hit_multi_sys_tracker) self.maxDiff = None self.assertEqual(ser, sol_tsv) def test_LikelySystemSerializer_txt(self): model = Model("foo/FOO", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model) model.add_accessory_gene(gene_sctn) c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model) model.add_forbidden_gene(gene_abc) hit_1 = CoreHit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ModelHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = CoreHit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ModelHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) hit_3 = CoreHit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_3 = ModelHit(hit_3, gene_sctn, GeneStatus.ACCESSORY) hit_4 = CoreHit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_4 = ModelHit(hit_4, gene_abc, GeneStatus.FORBIDDEN) ls_1 = LikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [v_hit_4]) hit_multi_sys_tracker = HitSystemTracker([ls_1]) ser = TxtLikelySystemSerializer() txt = ser.serialize(ls_1, hit_multi_sys_tracker) expected_txt = """This replicon contains genetic materials needed for system foo/FOO WARNING there quorum is reached but there is also some forbidden genes. system id = replicon_id_FOO_1 model = foo/FOO replicon = replicon_id hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)] wholeness = 1.000 mandatory genes: \t- gspD: 1 (gspD) accessory genes: \t- sctJ: 1 (sctJ) \t- sctN: 1 (sctN) neutral genes: forbidden genes: \t- abc: 1 (abc) Use ordered replicon to have better prediction. """ self.assertEqual(txt, expected_txt) def test_UnlikelySystemSerializer_txt(self): model = Model("foo/FOO", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model) model.add_accessory_gene(gene_sctn) c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model) model.add_forbidden_gene(gene_abc) hit_1 = CoreHit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ModelHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = CoreHit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ModelHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) hit_3 = CoreHit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_3 = ModelHit(hit_3, gene_sctn, GeneStatus.ACCESSORY) hit_4 = CoreHit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_4 = ModelHit(hit_4, gene_abc, GeneStatus.FORBIDDEN) ser = TxtUnikelySystemSerializer() ls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [v_hit_4], ["the reason why"]) txt = ser.serialize(ls_1) expected_txt = """This replicon probably not contains a system foo/FOO: the reason why system id = replicon_id_FOO_1 model = foo/FOO replicon = replicon_id hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)] wholeness = 1.000 mandatory genes: \t- gspD: 1 (gspD) accessory genes: \t- sctJ: 1 (sctJ) \t- sctN: 1 (sctN) neutral genes: forbidden genes: \t- abc: 1 (abc) Use ordered replicon to have better prediction. """ self.assertEqual(txt, expected_txt) def test_SpecialHitSerializer_tsv(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation( path=os.path.join(args.models_dir, model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe profile_factory = ProfileFactory(cfg) model = Model("foo/T2SS", 10) gene_name = "gspD" cg_gspd = CoreGene(models_location, gene_name, profile_factory) mg_gspd = ModelGene(cg_gspd, model, loner=True) gene_name = "sctJ" cg_sctj = CoreGene(models_location, gene_name, profile_factory) mg_sctj = ModelGene(cg_sctj, model) gene_name = "abc" cg_abc = CoreGene(models_location, gene_name, profile_factory) mg_abc = ModelGene(cg_abc, model) model.add_mandatory_gene(mg_gspd) model.add_accessory_gene(mg_sctj) model.add_accessory_gene(mg_abc) chit_abc = CoreHit(cg_abc, "hit_abc", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) chit_sctj = CoreHit(cg_sctj, "hit_sctj", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) chit_gspd1 = CoreHit(cg_gspd, "hit_gspd1", 803, "replicon_id", 20, 1.0, 2.0, 1.0, 1.0, 10, 20) chit_gspd2 = CoreHit(cg_gspd, "hit_gspd2", 803, "replicon_id", 30, 1.0, 3.0, 1.0, 1.0, 10, 20) mhit_abc = ModelHit(chit_abc, mg_abc, GeneStatus.ACCESSORY) mhit_sctj = ModelHit(chit_sctj, mg_sctj, GeneStatus.ACCESSORY) mhit_gspd1 = ModelHit(chit_gspd1, mg_gspd, GeneStatus.MANDATORY) mhit_gspd2 = ModelHit(chit_gspd2, mg_gspd, GeneStatus.MANDATORY) l_gspd1 = Loner(mhit_gspd1, counterpart=[mhit_gspd2]) l_gspd2 = Loner(mhit_gspd2, counterpart=[mhit_gspd1]) ser = TsvSpecialHitSerializer() txt = ser.serialize([l_gspd1, l_gspd2]) expected_txt = "\t".join([ 'replicon', 'model_fqn', 'function', 'gene_name', 'hit_id', 'hit_pos', 'hit_status', 'hit_seq_len', 'hit_i_eval', 'hit_score', 'hit_profile_cov', 'hit_seq_cov', 'hit_begin_match', 'hit_end_match' ]) expected_txt += "\n" expected_txt += "\t".join([ 'replicon_id', 'foo/T2SS', 'gspD', 'gspD', 'hit_gspd1', '20', 'mandatory', '803', '1.000e+00', '2.000', '1.000', '1.000', '10', '20' ]) expected_txt += "\n" expected_txt += "\t".join([ 'replicon_id', 'foo/T2SS', 'gspD', 'gspD', 'hit_gspd2', '30', 'mandatory', '803', '1.000e+00', '3.000', '1.000', '1.000', '10', '20' ]) expected_txt += "\n" self.maxDiff = None self.assertEqual(txt, expected_txt)
def main(args=None, log_level=None) -> None: """ main entry point to macsyprofile :param args: the arguments passed on the command line without the program name :type args: List of string :param log_level: the output verbosity :type log_level: a positive int or a string among 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL' """ global _log args = sys.argv[1:] if args is None else args parsed_args = parse_args(args) if log_level is None: log_level = verbosity_to_log_level(parsed_args.verbosity) _log = init_logger(log_level, out=(not parsed_args.mute)) if not os.path.exists(parsed_args.previous_run): _log.critical(f"{parsed_args.previous_run}: No such directory.") sys.tracebacklimit = 0 raise FileNotFoundError() from None elif not os.path.isdir(parsed_args.previous_run): _log.critical(f"{parsed_args.previous_run} is not a directory.") sys.tracebacklimit = 0 raise ValueError() from None defaults = MacsyDefaults(i_evalue_sel=1.0e9, coverage_profile=-1.0) cfg = Config(defaults, parsed_args) msf_run_path = cfg.previous_run() hmmer_results = os.path.join(msf_run_path, cfg.hmmer_dir()) hmm_suffix = cfg.res_search_suffix() profile_suffix = cfg.profile_suffix() if parsed_args.out: profile_report_path = os.path.normpath(parsed_args.out) dirname = os.path.normpath(os.path.dirname(parsed_args.out)) if not os.path.exists(dirname): _log.critical(f"The {dirname} directory is not writable") sys.tracebacklimit = 0 raise ValueError() from None else: profile_report_path = os.path.join(cfg.previous_run(), 'hmm_coverage.tsv') if os.path.exists(profile_report_path) and not parsed_args.force: _log.critical( f"The file {profile_report_path} already exists. " f"Remove it or specify a new output name --out or use --force option" ) sys.tracebacklimit = 0 raise ValueError() from None hmmer_files = sorted( glob.glob( os.path.join(hmmer_results, f"{parsed_args.pattern}{hmm_suffix}"))) try: model_familly_name = cfg.models()[0] model_dir = [ p for p in [os.path.join(p, model_familly_name) for p in cfg.models_dir()] if os.path.exists(p) ][-1] profiles_dir = os.path.join(model_dir, 'profiles') except IndexError: _log.critical( f"Cannot find models in conf file {msf_run_path}. " f"May be these results have been generated with an old version of macsyfinder." ) sys.tracebacklimit = 0 raise ValueError() from None _log.debug(f"hmmer_files: {hmmer_files}") all_hits = [] with open(profile_report_path, 'w') as prof_out: print(header(args), file=prof_out) for hmmer_out_path in hmmer_files: _log.info(f"parsing {hmmer_out_path}") gene_name = get_gene_name(hmmer_out_path, hmm_suffix) profile_path = os.path.join(profiles_dir, f"{gene_name}{profile_suffix}") gene_profile_len = get_profile_len(profile_path) hmm = HmmProfile(gene_name, gene_profile_len, hmmer_out_path, cfg) hits = hmm.parse() all_hits += hits if len(all_hits) > 0: if parsed_args.best_hits: # It's important to keep this sorting to have in last all_hits version # the hits with the same replicon_name and position sorted by score # the best score in first hits_by_replicon = {} for hit in all_hits: if hit.replicon_name in hits_by_replicon: hits_by_replicon[hit.replicon_name].append(hit) else: hits_by_replicon[hit.replicon_name] = [hit] all_hits = [] for rep_name in hits_by_replicon: hits_by_replicon[rep_name] = get_best_hits( hits_by_replicon[rep_name], key=parsed_args.best_hits) all_hits += sorted(hits_by_replicon[rep_name], key=lambda h: h.position) all_hits = sorted( all_hits, key=lambda h: (h.gene_name, h.replicon_name, h.position, h.score)) _log.info(f"found {len(all_hits)} hits") for hit in all_hits: print(hit, file=prof_out) _log.info(f"result is in '{profile_report_path}'") else: _log.info("No hit found")