def test_roundtripped_alignment2(self): """Sliced Alignment with annotations roundtrips correctly""" # annotations just on member sequences aln = make_aligned_seqs(data=[["x", "-AAAGGGGGAACCCT"], ["y", "TTTT--TTTTAGGGA"]], array_align=False) of1 = aln.get_seq("x").add_annotation(Feature, "exon", "E1", [(3, 8)]) of2 = aln.get_seq("x").add_annotation(Feature, "exon", "E2", [(10, 13)]) # at the alignment level sub_aln = aln[:-3] s = sub_aln.named_seqs["x"] e2 = s.data.get_annotations_matching("exon", "E2")[0] d = s.data[:11] json = s.to_json() new = deserialise_object(json) gf1, gf2 = list(new.data.get_annotations_matching("exon")) self.assertEqual(str(gf1.get_slice()), "GGGGG") self.assertEqual(str(gf2.get_slice()), "C") # the sliced alignment json = sub_aln.to_json() got = deserialise_object(json) x = got.named_seqs["x"] self.assertEqual(str(x.data.annotations[0].get_slice()), "GGGGG") self.assertEqual(str(x.data.annotations[1].get_slice()), "C")
def test_roundtrip_model_result2(self): """model_result of split codon correct type after roundtrip""" from cogent3.app import evo as evo_app from cogent3.evolve.parameter_controller import AlignmentLikelihoodFunction _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") opt_args = dict(max_evaluations=10, limit_action="ignore") m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args) result = m1(aln) data = result.to_json() got_obj = deserialise_object(data) for i in range(1, 4): self.assertIsInstance(got_obj[i], dict) # after accessing attribute, should be automatically inflated _ = got_obj.lf for i in range(1, 4): self.assertIsInstance(got_obj[i], AlignmentLikelihoodFunction) # or after using the deserialise method data = result.to_json() got_obj = deserialise_object(data) got_obj.deserialised_values() for i in range(1, 4): self.assertIsInstance(got_obj[i], AlignmentLikelihoodFunction)
def test_roundtrip_discrete_time_submod(self): """discrete time substitution models to_json enables roundtrip""" sm = get_model("DT") data = sm.to_json() got = deserialise_object(data) self.assertEqual(got.to_rich_dict(), sm.to_rich_dict()) sm = get_model("DT", motif_length=2) data = sm.to_json() got = deserialise_object(data) self.assertEqual(got.to_rich_dict(), sm.to_rich_dict())
def test_roundtrip_submod(self): """substitution model to_json enables roundtrip""" sm = get_model("HKY85") data = sm.to_json() got = deserialise_object(data) self.assertEqual(got.to_rich_dict(), sm.to_rich_dict()) sm = get_model("GN") data = sm.to_json() got = deserialise_object(data) self.assertEqual(got.to_rich_dict(), sm.to_rich_dict()) sm = get_model("CNFGTR") data = sm.to_json() got = deserialise_object(data) self.assertEqual(got.to_rich_dict(), sm.to_rich_dict())
def test_roundtripped_alignment(self): """Alignment with annotations roundtrips correctly""" # annotations just on member sequences aln = make_aligned_seqs(data=[["x", "-AAAAAAAAA"], ["y", "TTTT--TTTT"]], array_align=False) _ = aln.get_seq("x").add_annotation(Feature, "exon", "fred", [(3, 8)]) seq_exon = list(aln.get_annotations_from_seq("x", "exon"))[0] expect = seq_exon.get_slice() json = aln.to_json() new = deserialise_object(json) got_exons = list(new.get_annotations_from_seq("x", "exon"))[0] self.assertEqual(got_exons.get_slice().to_dict(), expect.to_dict()) # annotations just on alignment aln = make_aligned_seqs(data=[["x", "-AAAAAGGGG"], ["y", "TTTT--CCCC"]], array_align=False) f = aln.add_annotation(Feature, "generic", "no name", [(1, 4), (6, 10)]) expect = f.get_slice().to_dict() json = aln.to_json() new = deserialise_object(json) got = list(new.get_annotations_matching("generic"))[0] self.assertEqual(got.get_slice().to_dict(), expect) # annotations on both alignment and sequence aln = make_aligned_seqs(data=[["x", "-AAAAAGGGG"], ["y", "TTTT--CCCC"]], array_align=False) f = aln.add_annotation(Feature, "generic", "no name", [(1, 4), (6, 10)]) _ = aln.get_seq("x").add_annotation(Feature, "exon", "1", [(3, 8)]) json = aln.to_json() new = deserialise_object(json) ## get back the exon seq_exon = list(aln.get_annotations_from_seq("x", "exon"))[0] expect = seq_exon.get_slice().to_dict() got_exons = list(new.get_annotations_from_seq("x", "exon"))[0] self.assertEqual(got_exons.get_slice().to_dict(), expect) ## get back the generic expect = f.get_slice().to_dict() got = list(new.get_annotations_matching("generic"))[0] self.assertEqual(got.get_slice().to_dict(), expect) # check masking of seq features still works new = new.with_masked_annotations("exon", mask_char="?") self.assertEqual(new[4:9].to_dict(), dict(x="?????", y="--CCC"))
def test_json_roundtrip(self): """roundtrip from json correct""" coll = model_collection_result(name="blah", source="blah2") coll.update(self._model_results) self.assertEqual(coll.name, "blah") self.assertEqual(coll.source, "blah2") orig = coll.__repr__() got = deserialise_object(coll.to_json()) self.assertEqual(got.__repr__(), orig) self.assertIsInstance(got, model_collection_result) self.assertEqual(got.name, coll.name) self.assertEqual(got.source, coll.source) # select_models() should not fail got = deserialise_object(coll.to_json()) m = got.select_models() self.assertIsInstance(m[0], model_result)
def test_dendro_with_support(self): """exercising creating dendrograms with support measure""" data = { "newick": "(A,(B,C)edge.1,(D,E)edge.0)", "edge_attributes": { "A": {"support": 1.0, "length": 0.148}, "B": {"support": 1.0, "length": 0.098}, "C": {"support": 1.0, "length": 0.134}, "edge.1": {"support": 0.8, "length": 0.016}, "D": {"support": 1.0, "length": 0.087}, "E": {"support": 1.0, "length": 0.048}, "edge.0": {"support": 0.6, "length": 0.131}, "root": {"length": None}, }, "type": "cogent3.core.tree.PhyloNode", "version": "2019.10.17a", } tree = deserialise_object(data) # currently not supported for "circular", "radial" for style in ("square", "angular"): y_support = Dendrogram(tree, style=style, show_support=True, threshold=0.8) n_support = Dendrogram(tree, style=style, show_support=False) self.assertEqual( len(y_support.figure.layout.annotations) - len(n_support.figure.layout.annotations), 2, style, )
def test_roundtrip_rc_annotated_align(self): """should work for an alignment that has been reverse complemented""" # the key that exposed the bug was a gap in the middle of the sequence aln = make_aligned_seqs( data=[["x", "-AAAGGGGGAAC-CT"], ["y", "TTTT--TTTTAGGGA"]], array_align=False, moltype="dna", ) of1 = aln.get_seq("x").add_annotation(Feature, "exon", "E1", [(3, 8)]) of2 = aln.get_seq("x").add_annotation(Feature, "exon", "E2", [(10, 13)]) raln = aln.rc() json = raln.to_json() got = deserialise_object(json) self.assertEqual(got.to_dict(), raln.to_dict()) orig_annots = { a.name: a.get_slice() for a in raln.get_annotations_from_any_seq() } got_annots = { a.name: a.get_slice() for a in got.get_annotations_from_any_seq() } self.assertEqual(got_annots, orig_annots)
def load_from_json(filename, classes): """Loads objects from json files. Parameters ---------- filename: name of the json file classes: A series of the Cogent3 types, for example: (Alignment, ArrayAlignment) """ assert all( (isinstance(klass, type) for klass in classes) ), "classes should be a series of Cogent3 types, for example: (Alignment, ArrayAlignment)" with open_(filename) as f: content = json.loads(f.read()) try: _, data, completed = load_record_from_json(content) if not completed: raise TypeError("json file is a record for type NotCompleted.") except (KeyError, TypeError): data = content type_ = data.get("type", None) if type_ is None: raise TypeError("json does not contain 'type' key") valid_types = {get_object_provenance(klass) for klass in classes} if type_ not in valid_types: raise TypeError( f"Invalid data type: {type_} is not one of {valid_types}") return deserialise_object(data)
def test_roundtrip_alphabet(self): """alphabet to_json enables roundtrip""" dna = moltype.get_moltype("dna") data = dna.alphabet.to_json() got = deserialise_object(data) self.assertEqual(type(got), type(dna.alphabet)) self.assertEqual(list(got), list(dna.alphabet))
def test_roundtrip_hypothesis_result(self): """nested items retain the correct type after roundtrip""" from cogent3.app import evo as evo_app from cogent3.evolve.parameter_controller import AlignmentLikelihoodFunction _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") opt_args = dict(max_evaluations=10, limit_action="ignore") m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args) m2 = evo_app.model("GTR", split_codons=True, opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) result = hyp(aln) self.assertIsInstance(result["F81"][1], AlignmentLikelihoodFunction) data = result.to_json() got_obj = deserialise_object(data) for i in range(1, 4): for sm in ("F81", "GTR"): self.assertIsInstance(got_obj[sm][i], dict) # but after invoking deserialised_values got_obj.deserialised_values() for i in range(1, 4): for sm in ("F81", "GTR"): self.assertIsInstance(got_obj[sm][i], AlignmentLikelihoodFunction)
def test_deserialise_tabular_distancematrix(self): """correctly deserialises DistanceMatrix""" from cogent3.evolve.fast_distance import DistanceMatrix data = { ("ABAYE2984", "Atu3667"): None, ("ABAYE2984", "Avin_42730"): 0.638, ("ABAYE2984", "BAA10469"): None, ("Atu3667", "ABAYE2984"): None, ("Atu3667", "Avin_42730"): 2.368, ("Atu3667", "BAA10469"): None, ("Avin_42730", "ABAYE2984"): 0.638, ("Avin_42730", "Atu3667"): 2.368, ("Avin_42730", "BAA10469"): 1.85, ("BAA10469", "ABAYE2984"): None, ("BAA10469", "Atu3667"): None, ("BAA10469", "Avin_42730"): 1.85, } dm = DistanceMatrix(data) json = dm.to_json() got = deserialise_object(json) dm_dict = dm.to_dict() got_dict = got.to_dict() for (a, b), dist in dm_dict.items(): if dist is None: assert numpy.isnan(got_dict[a, b]) else: assert_allclose(dist, got_dict[a, b])
def test_roundtrip_seqcoll(self): """SequenceCollection to_json enables roundtrip""" data = dict(A="TTGT", B="GGCT") seqcoll = make_unaligned_seqs(data=data, moltype="dna") got = deserialise_object(seqcoll.to_json()) self.assertEqual(got.rc().to_dict(), seqcoll.rc().to_dict()) self.assertIsInstance(got, alignment.SequenceCollection)
def test_roundtrip_align(self): """Alignment to_json enables roundtrip""" data = dict(A="TTGTA", B="GGCT-") align = make_aligned_seqs(data=data, moltype="dna", array_align=False) got = deserialise_object(align.to_json()) self.assertEqual(got.rc().to_dict(), align.rc().to_dict()) self.assertIsInstance(got, alignment.Alignment)
def test_roundtrip_model_result(self): """mode_result.to_json enables roundtrip and lazy evaluation""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") tree = make_tree(tip_names=aln.names) sm = get_model("HKY85") lf = sm.make_likelihood_function(tree) lf.set_alignment(aln) edge_vals = zip(aln.names, (2, 3, 4)) for edge, val in edge_vals: lf.set_param_rule("kappa", edge=edge, init=val) result = model_result(name="test") result[1] = lf self.assertIs(result[1], lf) self.assertEqual(result.nfp, lf.nfp) self.assertEqual(result.lnL, lf.lnL) data = result.to_json() got_obj = deserialise_object(data) # lazy evaluation means initially, the value is a dict self.assertIsInstance(got_obj[1], dict) # and properties match original self.assertEqual(got_obj.lnL, result.lnL) self.assertEqual(got_obj.nfp, result.nfp) self.assertEqual(got_obj.DLC, result.DLC) # when we ask for the lf attribute, it's no longer a dict self.assertNotIsInstance(got_obj.lf, dict) self.assertEqual(got_obj.lf.nfp, got_obj.nfp)
def test_roundtrip_tuple_key(self): """deserialise_result handles tuples as keys""" from cogent3.app.result import generic_result r = generic_result(source="none") r[(1, 2)] = 24 got = deserialise_object(r.to_json()) self.assertEqual(got[(1, 2)], 24)
def test_nested_deserialise_annotation(self): """nested annotations can be deserialised""" got = self.s.to_json() new = deserialise_object(got) new_exon1 = new.annotations[0] new_nested_feature = new_exon1.annotations[0] self.assertEqual(new_nested_feature.to_rich_dict(), self.nested_feature.to_rich_dict())
def test_roundtrip_json(self): """features can roundtrip from json""" from cogent3.util.deserialise import deserialise_seq seq = DNA.make_seq("AAAAATATTATTGGGT") seq.add_annotation(Feature, "exon", "myname", [(0, 5)]) got = seq.to_json() new = deserialise_object(got) feat = new.get_annotations_matching("exon")[0] self.assertEqual(str(feat.get_slice()), "AAAAA") # now with a list span seq = seq[3:] feat = seq.get_annotations_matching("exon")[0] got = seq.to_json() new = deserialise_object(got) feat = new.get_annotations_matching("exon")[0] self.assertEqual(str(feat.get_slice(complete=False)), "AA")
def test_not_completed_result(self): """correctly reconstructs a NotCompletedResult object""" from cogent3.app.composable import NotCompleted val = NotCompleted("ERROR", "nothing", "some error", source="here") expect = val.to_rich_dict() json = val.to_json() got = deserialise_object(json) self.assertEqual(got.to_rich_dict(), expect)
def test_roundtrip_annotated_seqcoll(self): """SequenceCollection to_json enables roundtrip of annotated sequences""" data = dict(A="TTGTA", B="GGCT") seqs = make_unaligned_seqs(data=data, moltype="dna") f = seqs.named_seqs["A"].add_feature("gene", "n1", [(2, 5)]) data = seqs.to_json() expect = str(f.get_slice()) got = deserialise_object(data) self.assertEqual(str(got.named_seqs["A"].annotations[0].get_slice()), expect)
def test_roundtrip_seq(self): """seq to_json enables roundtrip""" for mtype in ("dna", "protein"): mtype = moltype.get_moltype(mtype) seq = mtype.make_seq("ACGGTCGG", "label", info={"something": 3}) got = deserialise_object(seq.to_json()) self.assertEqual(got.info.something, 3) self.assertEqual(got.name, "label") self.assertEqual(got.moltype, seq.moltype) self.assertEqual(str(got), str(seq))
def deserialised_values(self): """deserialises any cogent3 members""" from cogent3.util.deserialise import deserialise_object for key, value in self.items(): if isinstance(value, dict): type_ = value.get("type", "") if "cogent3" in type_: object = deserialise_object(value) self[key] = object
def test_roundtrip_variable(self): """should recover the Variable feature type""" seq = DNA.make_seq("AAGGGGAAAACCCCCAAAAAAAAAATTTTTTTTTTAAA", name="plus") xx_y = [[[2, 6], 2.4], [[10, 15], 5.1], [[25, 35], 1.3]] y_valued = seq.add_annotation(Variable, "SNP", "freq", xx_y) json = seq.to_json() new = deserialise_object(json) got = list(new.get_annotations_matching("SNP"))[0] # annoyingly, comes back as list of lists self.assertEqual(got.xxy_list, [[list(xx), y] for xx, y in y_valued.xxy_list])
def test_roundtrip_het_lf(self): """correctly round trips a site-het model""" with open("data/site-het-param-rules.json") as infile: rules = json.load(infile) aln = load_aligned_seqs("data/primates_brca1.fasta", moltype="dna") tree = load_tree("data/primates_brca1.tree") rule_lnL = rules.pop("phylohmm-gamma-kappa") sm = get_model("HKY85", ordered_param="rate", distribution="gamma") lf1 = sm.make_likelihood_function(tree, bins=4, sites_independent=False) lf1.set_alignment(aln) lf1.apply_param_rules(rule_lnL["rules"]) data = lf1.to_json() got_lf = deserialise_object(data) assert_allclose(lf1.lnL, got_lf.lnL)
def test_deserialise_tabular_dictarray(self): """correctly deserialises DictArray""" from cogent3.util.dict_array import DictArrayTemplate template = DictArrayTemplate(5, ["id", "foo", "bar"]) data = [ [1, "abc", 11], [2, "bca", 22], [3, "cab", 33], [4, "abc", 44], [5, "bca", 55], ] darr = template.wrap(data) json = darr.to_json() got = deserialise_object(json) self.assertEqual(got.to_dict(), darr.to_dict())
def test_bstrap(self): """exercising bootstrap with simple hypothesis""" aln = load_aligned_seqs(join(data_dir, "brca1.fasta"), moltype="dna") aln = aln.take_seqs(aln.names[:3]) aln = aln.omit_gap_pos(allowed_gap_frac=0) opt_args = dict(max_evaluations=20, limit_action="ignore") m1 = evo_app.model("F81", opt_args=opt_args) m2 = evo_app.model("HKY85", opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) strapper = evo_app.bootstrap(hyp, num_reps=2, parallel=False) result = strapper(aln) nd = result.null_dist self.assertTrue(set(type(v) for v in nd), {float}) json = result.to_json() got = deserialise_object(json) self.assertIsInstance(got, evo_app.bootstrap_result)
def test_deserialise_tabular_table(self): """correctly deserialises Table""" from cogent3 import make_table table = make_table( header=["id", "foo", "bar"], rows=[ [1, "abc", 11], [2, "bca", 22], [3, "cab", 33], [4, "abc", 44], [5, "bca", 55], ], ) json = table.to_json() got = deserialise_object(json) self.assertEqual(got.to_dict(), table.to_dict())
def test_roundtrip_discrete_time_likelihood_function(self): """discrete time likelihood function.to_json enables roundtrip""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") tree = make_tree(tip_names=aln.names) sm = get_model("BH") lf = sm.make_likelihood_function(tree) lf.set_alignment(aln) lf.optimise(max_evaluations=25, limit_action="ignore", show_progress=False) lnL = lf.get_log_likelihood() data = lf.to_json() got_obj = deserialise_object(data) assert_allclose(got_obj.get_log_likelihood(), lnL)
def read(self, path): """returns object deserialised from json at path""" if type(path) == str: path = SingleReadDataStore(path)[0] data = path.read() identifier, data, completed = load_record_from_json(data) result = deserialise_object(data) if hasattr(result, "info"): result.info["source"] = result.info.get("source", identifier) else: try: identifier = getattr(result, "source", identifier) setattr(result, "source", identifier) except AttributeError: pass return result
def read(self, identifier): """returns object deserialised from a TinyDb""" id_ = getattr(identifier, "id", None) if id_ is None: msg = (f"{identifier} not connected to a TinyDB. " "If a json file path, use io.load_json()") raise TypeError(msg) data = identifier.read() result = deserialise_object(data) if hasattr(result, "info"): result.info["source"] = result.info.get("source", identifier) else: try: identifier = getattr(result, "source", identifier) setattr(result, "source", identifier) except AttributeError: pass return result