def compare_txt_xml(self, old, new): """Compare text and XML based parser output.""" self.assertEqual(old.id, new.id) self.assertEqual(old.name, new.name) self.assertEqual(len(old), len(new)) self.assertEqual(str(old.seq), str(new.seq)) for key in set(old.annotations).intersection(new.annotations): if key == "references": self.assertEqual(len(old.annotations[key]), len(new.annotations[key])) for r1, r2 in zip(old.annotations[key], new.annotations[key]): # Tweak for line breaks in plain text SwissProt r1.title = r1.title.replace("- ", "-") r2.title = r2.title.replace("- ", "-") r1.journal = r1.journal.rstrip(".") # Should parser do this? r1.medline_id = "" # Missing in UniPort XML? TODO - check # Lots of extra comments in UniProt XML r1.comment = "" r2.comment = "" if not r2.journal: r1.journal = "" compare_reference(r1, r2) elif old.annotations[key] == new.annotations[key]: pass elif key in ["date"]: # TODO - Why is this a list vs str? pass elif not isinstance(old.annotations[key], type(new.annotations[key])): raise TypeError("%s gives %s vs %s" % (key, old.annotations[key], new.annotations[key])) elif key in ["organism"]: if old.annotations[key] == new.annotations[key]: pass elif old.annotations[key].startswith(new.annotations[key] + " "): pass else: raise ValueError(key) elif (isinstance(old.annotations[key], list) and sorted(old.annotations[key]) == sorted(new.annotations[key])): pass else: raise ValueError("%s gives %s vs %s" % (key, old.annotations[key], new.annotations[key])) self.assertEqual(len(old.features), len(new.features), "Features in %s, %i vs %i" % (old.id, len(old.features), len(new.features))) for f1, f2 in zip(old.features, new.features): """ self.assertEqual(f1.location.nofuzzy_start, f2.location.nofuzzy_start, "%s %s vs %s %s" % (f1.location, f1.type, f2.location, f2.type)) self.assertEqual(f1.location.nofuzzy_end, f2.location.nofuzzy_end, "%s %s vs %s %s" % (f1.location, f1.type, f2.location, f2.type)) """ self.assertEqual(repr(f1.location), repr(f2.location), "%s %s vs %s %s" % (f1.location, f1.type, f2.location, f2.type))
def compare_txt_xml(self, old, new): """Compare text and XML based parser output.""" self.assertEqual(old.id, new.id) self.assertEqual(old.name, new.name) self.assertEqual(len(old), len(new)) self.assertEqual(str(old.seq), str(new.seq)) for key in set(old.annotations).intersection(new.annotations): if key == "references": self.assertEqual(len(old.annotations[key]), len(new.annotations[key])) for r1, r2 in zip(old.annotations[key], new.annotations[key]): # Tweak for line breaks in plain text SwissProt r1.title = r1.title.replace("- ", "-") r2.title = r2.title.replace("- ", "-") r1.journal = r1.journal.rstrip(".") # Should parser do this? r1.medline_id = "" # Missing in UniPort XML? TODO - check # Lots of extra comments in UniProt XML r1.comment = "" r2.comment = "" if not r2.journal: r1.journal = "" compare_reference(r1, r2) elif old.annotations[key] == new.annotations[key]: pass elif key in ["date"]: # TODO - Why is this a list vs str? pass elif not isinstance(old.annotations[key], type(new.annotations[key])): raise TypeError("%s gives %s vs %s" % (key, old.annotations[key], new.annotations[key])) elif key in ["organism"]: if old.annotations[key] == new.annotations[key]: pass elif old.annotations[key].startswith(new.annotations[key] + " "): pass else: raise ValueError(key) elif isinstance(old.annotations[key], list) \ and sorted(old.annotations[key]) == sorted(new.annotations[key]): pass else: raise ValueError("%s gives %s vs %s" % (key, old.annotations[key], new.annotations[key])) self.assertEqual(len(old.features), len(new.features), "Features in %s, %i vs %i" % (old.id, len(old.features), len(new.features))) for f1, f2 in zip(old.features, new.features): """ self.assertEqual(f1.location.nofuzzy_start, f2.location.nofuzzy_start, "%s %s vs %s %s" % (f1.location, f1.type, f2.location, f2.type)) self.assertEqual(f1.location.nofuzzy_end, f2.location.nofuzzy_end, "%s %s vs %s %s" % (f1.location, f1.type, f2.location, f2.type)) """ self.assertEqual(repr(f1.location), repr(f2.location), "%s %s vs %s %s" % (f1.location, f1.type, f2.location, f2.type))