def setUp(self): self.func = domain_identification.filter_nonterminal_docking_domains self.cds = DummyCDS(0, 200) self.cds.translation = "A" * 200 self.record = DummyRecord(features=[self.cds])
def __init__(self, pid=.5, cds=None, ref_name="ref_cds_name"): super().__init__("ref_rec", ref_name, cds or DummyCDS(), pid, 1234., 105., 1e-8)
def run_ranking_as_genes(self, n_terms, c_terms, orders): genes = {name: DummyCDS(locus_tag=name) for name in orders[0]} gene_orders = [[genes[k] for k in order] for order in orders] res = orderfinder.rank_biosynthetic_orders(n_terms, c_terms, gene_orders) return "".join(gene.locus_tag for gene in res)
def setUp(self): self.gene_mapping = {"a": DummyCDS(1, 2, locus_tag="a"), "b": DummyCDS(3, 4, locus_tag="b"), "c": DummyCDS(5, 6, locus_tag="c"), "e": DummyCDS(7, 8, locus_tag="e")} self.genes = list(self.gene_mapping.values())
def test_order_finding_size(self): cdss = [DummyCDS() for i in range(11)] with self.assertRaisesRegex(AssertionError, "input too large"): orderfinder.find_possible_orders(cdss, None, None)
def setUp(self): # used by parse_subject, every sequence will be 100 long mock('Record.get_cds_by_name', returns=DummyCDS(1, 101)) mock('core.get_cds_lengths', returns={}) self.sample_data = self.read_sample_data() self.sample_data_as_lists = self.file_data_to_lists(self.sample_data)
def setUp(self): self.config = build_config([]) self.rules_file = path.get_full_path(__file__, "..", "cluster_rules", "strict.txt") self.signature_file = path.get_full_path(__file__, "..", "data", "hmmdetails.txt") self.signature_names = { sig.name for sig in core.get_signature_profiles() } self.filter_file = path.get_full_path(__file__, "..", "filterhmmdetails.txt") self.results_by_id = { "GENE_1": [ FakeHSPHit("modelA", "GENE_1", 0, 10, 50, 0), FakeHSPHit("modelB", "GENE_1", 0, 10, 50, 0) ], "GENE_2": [ FakeHSPHit("modelC", "GENE_2", 0, 10, 50, 0), FakeHSPHit("modelB", "GENE_2", 0, 10, 50, 0) ], "GENE_3": [ FakeHSPHit("modelC", "GENE_3", 0, 10, 50, 0), FakeHSPHit("modelF", "GENE_3", 0, 10, 50, 0) ], "GENE_4": [ FakeHSPHit("modelA", "GENE_4", 0, 10, 50, 0), FakeHSPHit("modelE", "GENE_4", 0, 10, 50, 0) ], "GENE_5": [ FakeHSPHit("modelA", "GENE_5", 0, 10, 50, 0), FakeHSPHit("modelG", "GENE_5", 0, 10, 50, 0) ] } self.feature_by_id = { "GENE_1": DummyCDS(0, 30000, locus_tag="GENE_1"), "GENE_2": DummyCDS(30000, 50000, locus_tag="GENE_2"), "GENE_3": DummyCDS(70000, 90000, locus_tag="GENE_3"), "GENE_X": DummyCDS(95000, 100000, locus_tag="GENE_X"), # no hits "GENE_4": DummyCDS(125000, 140000, locus_tag="GENE_4"), "GENE_5": DummyCDS(130000, 150000, locus_tag="GENE_5") } self.test_names = { "modelA", "modelB", "modelC", "modelF", "modelG", "a", "b", "c", "d" } self.rules = rule_parser.Parser( "\n".join([ "RULE MetaboliteA CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS modelA", "RULE MetaboliteB CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS cds(modelA and modelB)", "RULE MetaboliteC CUTOFF 10 NEIGHBOURHOOD 5 CONDITIONS (modelA and modelB)", "RULE MetaboliteD CUTOFF 20 NEIGHBOURHOOD 5 CONDITIONS minimum(2,[modelC,modelB]) and modelA", "RULE Metabolite0 CUTOFF 1 NEIGHBOURHOOD 3 CONDITIONS modelF", "RULE Metabolite1 CUTOFF 1 NEIGHBOURHOOD 3 CONDITIONS modelG" ]), self.test_names).rules self.features = [] for gene_id in self.feature_by_id: self.features.append(self.feature_by_id[gene_id]) self.features.sort( key=lambda x: x.location.start) # vital for py3 < 3.5 self.record = Record() self.record._record.seq = Seq("A" * 150000) for feature in self.features: self.record.add_cds_feature(feature)