class TextXMLFolderResolver(TestCase): """ Ensure working state of resolver """ def setUp(self): get_graph().remove((None, None, None)) self.resolver = CtsCapitainsLocalResolver(["./tests/testing_data/latinLit2"]) def test_getPassage_full(self): """ Test that we can get a full text """ passage = self.resolver.getTextualNode("urn:cts:latinLit:phi1294.phi002.perseus-lat2") self.assertIsInstance( passage, PrototypeCtsPassage, "GetPassage should always return passages objects" ) children = passage.getReffs() # We check the passage is able to perform further requests and is well instantiated self.assertEqual( children[0], CtsReference('1'), "Resource should be string identifiers" ) self.assertIn( "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export CtsTextMetadata should work correctly" ) self.assertEqual( passage.export( output=Mimetypes.PYTHON.ETREE ).xpath( ".//tei:div[@n='1']/tei:div[@n='1']/tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False ), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object" ) def test_getPassage_no_canonical(self): """ Test that we can get a subreference text passage where no canonical exists""" passage = self.resolver.getTextualNode("urn:cts:latinLit:phi0959.phi010.perseus-eng2", "2") self.assertEqual( passage.export(Mimetypes.PLAINTEXT), "Omne fuit Musae carmen inerme meae; ", "CapitainsCtsPassage should resolve if directly asked" ) with self.assertRaises(UnknownObjectError): passage = self.resolver.getTextualNode("urn:cts:latinLit:phi0959.phi010", "2") with self.assertRaises(InvalidURN): passage = self.resolver.getTextualNode("urn:cts:latinLit:phi0959", "2") def test_getPassage_subreference(self): """ Test that we can get a subreference text passage""" passage = self.resolver.getTextualNode("urn:cts:latinLit:phi1294.phi002.perseus-lat2", "1.1") # We check we made a reroute to GetPassage request self.assertIsInstance( passage, PrototypeCtsPassage, "GetPassage should always return passages objects" ) children = list(passage.getReffs()) self.assertEqual( str(children[0]), '1.1.1', "Resource should be string identifiers" ) self.assertIn( "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export CtsTextMetadata should work correctly" ) canonical = self.resolver.getTextualNode("urn:cts:latinLit:phi1294.phi002", "1.1") self.assertEqual( passage.export(output=Mimetypes.PLAINTEXT), canonical.export(output=Mimetypes.PLAINTEXT), "Canonical text should work" ) self.assertEqual( passage.export(output=Mimetypes.PYTHON.ETREE).xpath(".//tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object" ) def test_getPassage_full_metadata(self): """ Test that we can get a full text with its metadata""" passage = self.resolver.getTextualNode("urn:cts:latinLit:phi1294.phi002.perseus-lat2", metadata=True) self.assertIsInstance( passage, PrototypeCtsPassage, "GetPassage should always return passages objects" ) self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("title"), "eng"]), "Epigrammata", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("groupname"), "eng"]), "Martial", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("label"), "eng"]), "Epigrams", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("description"), "eng"]), "M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( passage.citation.name, "book", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( len(passage.citation), 3, "Local Inventory Files should be parsed and aggregated correctly" ) children = list(passage.getReffs(level=3)) # We check the passage is able to perform further requests and is well instantiated self.assertEqual( children[0], CtsReference('1.pr.1'), "Resource should be string identifiers" ) self.assertIn( "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export CtsTextMetadata should work correctly" ) self.assertEqual( passage.export( output=Mimetypes.PYTHON.ETREE ).xpath( ".//tei:div[@n='1']/tei:div[@n='1']/tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False ), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object" ) def test_getPassage_prevnext(self): """ Test that we can get a full text with its metadata""" passage = self.resolver.getTextualNode( "urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1", metadata=True ) self.assertIsInstance( passage, PrototypeCtsPassage, "GetPassage should always return passages objects" ) self.assertEqual( passage.prevId, CtsReference("1.pr"), "Previous CapitainsCtsPassage ID should be parsed" ) self.assertEqual( passage.nextId, CtsReference("1.2"), "Next CapitainsCtsPassage ID should be parsed" ) children = list(passage.getReffs()) # Ensure navigability self.assertIn( "verentia ludant; quae adeo antiquis auctoribus defuit, ut", passage.prev.export(output=Mimetypes.PLAINTEXT), "Left and Right Navigation should be available" ) self.assertIn( "Qui tecum cupis esse meos ubicumque libellos ", passage.next.export(output=Mimetypes.PLAINTEXT), "Left and Right Navigation should be available" ) # We check the passage is able to perform further requests and is well instantiated self.assertEqual( str(children[0]), '1.1.1', "Resource should be string identifiers" ) self.assertIn( "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export CtsTextMetadata should work correctly" ) self.assertEqual( passage.export(output=Mimetypes.PYTHON.ETREE).xpath(".//tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object" ) def test_getPassage_metadata_prevnext(self): """ Test that we can get a full text with its metadata""" passage = self.resolver.getTextualNode( "urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1", metadata=True, prevnext=True ) self.assertIsInstance( passage, PrototypeCtsPassage, "GetPassage should always return passages objects" ) self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("title"), "eng"]), "Epigrammata", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("groupname"), "eng"]), "Martial", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("label"), "eng"]), "Epigrams", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( str(passage.metadata[RDF_NAMESPACES.CTS.term("description"), "eng"]), "M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( passage.citation.name, "poem", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( passage.citation.root.name, "book", "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( len(passage.citation.root), 3, "Local Inventory Files should be parsed and aggregated correctly" ) self.assertEqual( passage.prevId, CtsReference("1.pr"), "Previous CapitainsCtsPassage ID should be parsed" ) self.assertEqual( passage.nextId, CtsReference("1.2"), "Next CapitainsCtsPassage ID should be parsed" ) children = list(passage.getReffs()) # Ensure navigability self.assertIn( "verentia ludant; quae adeo antiquis auctoribus defuit, ut", passage.prev.export(output=Mimetypes.PLAINTEXT), "Left and Right Navigation should be available" ) self.assertIn( "Qui tecum cupis esse meos ubicumque libellos ", passage.next.export(output=Mimetypes.PLAINTEXT), "Left and Right Navigation should be available" ) # We check the passage is able to perform further requests and is well instantiated self.assertEqual( str(children[0]), '1.1.1', "Resource should be string identifiers" ) self.assertIn( "Hic est quem legis ille, quem requiris,", passage.export(output=Mimetypes.PLAINTEXT), "Export CtsTextMetadata should work correctly" ) self.assertEqual( passage.export(output=Mimetypes.PYTHON.ETREE).xpath(".//tei:l[@n='1']/text()", namespaces=XPATH_NAMESPACES, magic_string=False), ["Hic est quem legis ille, quem requiris, "], "Export to Etree should give an Etree or Etree like object" ) def test_getMetadata_full(self): """ Checks retrieval of Metadata information """ metadata = self.resolver.getMetadata() self.assertIsInstance( metadata, Collection, "Resolver should return a collection object" ) self.assertIsInstance( metadata.members[0], Collection, "Members of Inventory should be TextGroups" ) self.assertEqual( len(metadata.descendants), 44, "There should be as many descendants as there is edition, translation, commentary, works and textgroup + 1 " "for default inventory" ) self.assertEqual( len(metadata.readableDescendants), 26, "There should be as many readable descendants as there is edition, translation, commentary (26 ed+tr+cm)" ) self.assertEqual( len([x for x in metadata.readableDescendants if isinstance(x, TextMetadata)]), 26, "There should be 24 editions + 1 translation + 1 commentary in readableDescendants" ) self.assertEqual( len(metadata.export(output=Mimetypes.PYTHON.ETREE).xpath( "//ti:edition[@urn='urn:cts:latinLit:phi1294.phi002.perseus-lat2']", namespaces=XPATH_NAMESPACES)), 1, "There should be one node in exported format corresponding to lat2" ) self.assertCountEqual( [x["@id"] for x in metadata.export(output=Mimetypes.JSON.DTS.Std)["member"]], ["urn:cts:latinLit:phi1294", "urn:cts:latinLit:phi0959", "urn:cts:greekLit:tlg0003", "urn:cts:latinLit:phi1276"], "There should be 4 Members in DTS JSON" ) def test_getMetadata_subset(self): """ Checks retrieval of Metadata information """ metadata = self.resolver.getMetadata(objectId="urn:cts:latinLit:phi1294.phi002") self.assertIsInstance( metadata, Collection, "Resolver should return a collection object" ) self.assertIsInstance( metadata.members[0], TextMetadata, "Members of CtsWorkMetadata should be Texts" ) self.assertEqual( len(metadata.descendants), 2, "There should be as many descendants as there is edition, translation, commentary" ) self.assertEqual( len(metadata.readableDescendants), 2, "There should be 1 edition + 1 commentary in readableDescendants" ) self.assertEqual( len([x for x in metadata.readableDescendants if isinstance(x, TextMetadata)]), 2, "There should be 1 edition + 1 commentary in readableDescendants" ) self.assertIsInstance( metadata.parent, CtsTextgroupMetadata, "First parent should be CtsTextgroupMetadata" ) self.assertIsInstance( metadata.parents[0], CtsTextgroupMetadata, "First parent should be CtsTextgroupMetadata" ) self.assertEqual( len(metadata.export(output=Mimetypes.PYTHON.ETREE).xpath( "//ti:edition[@urn='urn:cts:latinLit:phi1294.phi002.perseus-lat2']", namespaces=XPATH_NAMESPACES)), 1, "There should be one node in exported format corresponding to lat2" ) self.assertCountEqual( [x["@id"] for x in metadata.export(output=Mimetypes.JSON.DTS.Std)["member"]], ["urn:cts:latinLit:phi1294.phi002.opp-eng3", "urn:cts:latinLit:phi1294.phi002.perseus-lat2"], "There should be two members in DTS JSON" ) tr = self.resolver.getMetadata(objectId="urn:cts:greekLit:tlg0003.tlg001.opp-fre1") self.assertIsInstance( tr, CtsTranslationMetadata, "Metadata should be translation" ) self.assertEqual( tr.lang, "fre", "Language is French" ) self.assertIn( "Histoire de la Guerre du Péloponnése", tr.get_description("eng"), "Description should be the right one" ) cm = self.resolver.getMetadata(objectId="urn:cts:latinLit:phi1294.phi002.opp-eng3") self.assertIsInstance( cm, CtsCommentaryMetadata, "Metadata should be commentary" ) self.assertEqual( cm.lang, "eng", "Language is English" ) self.assertIn( "Introduction to Martial's Epigrammata", cm.get_description("eng"), "Description should be the right one" ) def test_getSiblings(self): """ Ensure getSiblings works well """ previous, nextious = self.resolver.getSiblings( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1" ) self.assertEqual( previous, CtsReference("1.pr"), "Previous reference should be well computed" ) self.assertEqual( nextious, CtsReference("1.2"), "Next reference should be well computed" ) def test_getSiblings_nextOnly(self): """ Ensure getSiblings works well when there is only the next passage""" previous, nextious = self.resolver.getSiblings( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.pr" ) self.assertEqual( previous, None, "Previous reference should not exist" ) self.assertEqual( nextious, CtsReference("1.1"), "Next reference should be well computed" ) def test_getSiblings_prevOnly(self): """ Ensure getSiblings works well when there is only the previous passage""" previous, nextious = self.resolver.getSiblings( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="14.223" ) self.assertEqual( previous, CtsReference("14.222"), "Previous reference should be well computed" ) self.assertEqual( nextious, None, "Next reference should not exist" ) def test_getReffs_full(self): """ Ensure getReffs works well """ reffs = self.resolver.getReffs(textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", level=1) self.assertEqual( len(reffs), 14, "There should be 14 books" ) self.assertEqual( reffs[0], CtsReference("1") ) reffs = self.resolver.getReffs(textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", level=2) self.assertEqual( len(reffs), 1527, "There should be 1527 poems" ) self.assertEqual( reffs[0], CtsReference("1.pr") ) reffs = self.resolver.getReffs( textId="urn:cts:latinLit:phi1294.phi002.perseus-lat2", subreference="1.1", level=1 ) self.assertEqual( len(reffs), 6, "There should be 6 references" ) self.assertEqual( reffs[0], CtsReference("1.1.1") )
from MyCapytain.resolvers.cts.local import CtsCapitainsLocalResolver from MyCapytain.common.constants import Mimetypes import os os.mkdir('text') Repository = CtsCapitainsLocalResolver(["./"]) for text in Repository.texts: try: interactive_text = Repository.getTextualNode(text.id) plaintext = interactive_text.export( Mimetypes.PLAINTEXT, exclude=["tei:note", "tei:teiHeader"]) with open('text/{}.txt'.format(text.id.split(':')[-1]), mode='w') as f: f.write(plaintext) except Exception as E: print(E) continue