def test_overlaps_but_not_contains(self): # should get gene2 and gene3 rules = rule_parser.Parser( "\n".join([ "RULE Overlap CUTOFF 25 NEIGHBOURHOOD 5 CONDITIONS modelB and modelF " "RULE OverlapImpossible CUTOFF 25 NEIGHBOURHOOD 5 CONDITIONS modelA and modelF" ]), self.test_names).rules detected_types, cluster_type_hits = hmm_detection.apply_cluster_rules( self.record, self.results_by_id, rules) assert detected_types == { "GENE_2": { "Overlap": {"modelB"} }, "GENE_3": { "Overlap": {"modelF"} } } assert cluster_type_hits == {"Overlap": {"GENE_2", "GENE_3"}} # only 1 cluster should be found, since it requires both genes # if forming clusters by .is_contained_by(), 2 clusters will be formed # if finding rule hits uses .is_contained_by(), no clusters will be formed rules_by_name = {rule.name: rule for rule in rules} clusters = hmm_detection.find_protoclusters(self.record, cluster_type_hits, rules_by_name) assert len(clusters) == 1 assert clusters[0].product == "Overlap" assert clusters[0].core_location.start == 30000 assert clusters[0].core_location.end == 90000
def test_apply_cluster_rules(self): detected_types, cluster_type_hits = hmm_detection.apply_cluster_rules( self.record, self.results_by_id, self.rules) for gid in detected_types: detected_types[gid] = set(detected_types[gid]) expected_types = { "GENE_1": set(["MetaboliteA", "MetaboliteB", "MetaboliteC", "MetaboliteD"]), "GENE_2": set(["MetaboliteC", "MetaboliteD"]), "GENE_3": set(["Metabolite0"]), "GENE_4": set(["MetaboliteA"]), "GENE_5": set(["Metabolite1", "MetaboliteA"]) } assert detected_types == expected_types assert cluster_type_hits == { "MetaboliteA": {"GENE_1", "GENE_4", "GENE_5"}, "MetaboliteB": {"GENE_1"}, "MetaboliteC": {"GENE_1", "GENE_2"}, 'MetaboliteD': {'GENE_1', 'GENE_2'}, 'Metabolite0': {'GENE_3'}, 'Metabolite1': {'GENE_5'} }