def test_dispatching_error(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") dispatcher = CollectionDispatcher(tic) # We remove default dispatcher dispatcher.__methods__ = [] @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False NautilusCTSResolver.RAISE_ON_UNDISPATCHED = True with self.assertRaises(Exception): resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.parse() NautilusCTSResolver.RAISE_ON_UNDISPATCHED = False try: resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() except UndispatchedTextError as E: self.fail("UndispatchedTextError should not have been raised")
def make_dispatcher(): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") latin.set_label("Latin Classique", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False return dispatcher
def test_post_work_dispatching_active(self): """ Dispatching is working after editions, we dispatch based on citation scheme""" tic = CtsTextInventoryCollection() poetry = CtsTextInventoryMetadata("urn:perseus:poetry", parent=tic) prose = CtsTextInventoryMetadata("urn:perseus:prose", parent=tic) dispatcher = CollectionDispatcher(tic, default_inventory_name="urn:perseus:prose") @dispatcher.inventory("urn:perseus:poetry") def dispatchPoetry(collection, **kwargs): for readable in collection.readableDescendants: for citation in readable.citation: if citation.name == "line": return True return False resolver = CtsCapitainsLocalResolver( ["./tests/testing_data/latinLit2"], dispatcher=dispatcher ) all = resolver.getMetadata().export(Mimetypes.XML.CTS) poetry_stuff = resolver.getMetadata("urn:perseus:poetry").export(Mimetypes.XML.CTS) prose_stuff = resolver.getMetadata("urn:perseus:prose").export(Mimetypes.XML.CTS) get_graph().remove((None, None, None)) del poetry, prose poetry, prose = XmlCtsTextInventoryMetadata.parse(poetry_stuff), XmlCtsTextInventoryMetadata.parse(prose_stuff) self.assertEqual( len(poetry.textgroups), 3, "There should be 3 textgroups in Poetry (Martial, Ovid and Juvenal)" ) self.assertIsInstance(poetry, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(prose.textgroups), 1, "There should be one textgroup in Prose (Greek texts)" ) get_graph().remove((None, None, None)) del poetry, prose all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 26, "There should be all 26 readable descendants in the master collection" )
def build_resolver(configuration_file): """ :param configuration_file: :return: Organizer, Resolver and Cache handler """ with open(configuration_file) as f: xml = etree.parse(f) directories = [ # Compute path relative to the configuration files relative_folder(configuration_file, directory) for directory in xml.xpath("//corpora/corpus/text()") ] default_collection = None general_collection = CtsTextInventoryCollection() filters_to_register = [] for collection in xml.xpath("//collections/collection"): identifier = collection.xpath("./identifier/text()")[0] if collection.get("default") == "true": default_collection = identifier current_collection = CtsTextInventoryMetadata( identifier, parent=general_collection) for name in collection.xpath("./name"): current_collection.set_label(name.text, name.get("lang")) # We look at dispatching filters in the collection for filters in collection.xpath("./filters"): # We register prefix filters prefix_filters = [] for prefix in filters.xpath("./id-starts-with/text()"): prefix_filters.append( lambda collection: str(collection.id).startswith(prefix)) # We register citation filters citation_filters = [] for citation_name in filters.xpath("./citation-contains/text()"): citation_filters.append( lambda collection: citation_contain_filter( collection, citation_name)) # We register path based filters directory_filters = [] for target_directory in filters.xpath("./folder/text()"): directory_filters.append( lambda collection, path=None: path.startswith( relative_folder(configuration_file, target_directory))) filters_to_register += [ (identifier, collection_dispatcher_builder(collection, prefix_filters, citation_filters, directory_filters)) ] # Create the dispatcher organizer = CollectionDispatcher(general_collection, default_inventory_name=default_collection) for destination_collection, anonymous_dispatching_function in filters_to_register: organizer.add(anonymous_dispatching_function, destination_collection) # Set-up the cache folder # ToDO : Add a system for redis ? cache = None for cache_folder in xml.xpath("//cache-folder/text()"): cache = FileSystemCache(cache_folder) if cache is None: cache = SimpleCache() resolver = NautilusCTSResolver(resource=directories, dispatcher=organizer, cache=cache) return organizer, resolver, cache
chapters = [] for chapter_number in range( 0, 81): # Range in Python stops before its end limit chapters.append(( # Tuple are written with an () in python str(chapter_number), # First the reference for the URI as string "Pratum Spirituale " + str(chapter_number) # Then the readable format for humans )) return chapters # Setting up the collections general_collection = CtsTextInventoryCollection() greek_texts = CtsTextInventoryMetadata("greek_texts", parent=general_collection) greek_texts.set_label("Greek Texts", "eng") organizer = CollectionDispatcher(general_collection, default_inventory_name="id:misc") @organizer.inventory("greek_texts") def organize_my_meadow(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit"): return True return False flask_app = Flask("Flask Application for Nemo") resolver = NautilusCTSResolver(["corpora/meadow"], dispatcher=organizer)
def test_dispatching_output(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"], dispatcher=dispatcher) resolver.logger.disabled = True resolver.REMOVE_EMPTY = False resolver.parse() all = resolver.getMetadata().export(Mimetypes.XML.CTS) latin_stuff = resolver.getMetadata("urn:perseus:latinLit").export( Mimetypes.XML.CTS) greek_stuff = resolver.getMetadata("urn:perseus:greekLit").export( Mimetypes.XML.CTS) farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit").export( Mimetypes.XML.CTS) get_graph().remove((None, None, None)) latin_stuff, greek_stuff, farsi_stuff = XmlCtsTextInventoryMetadata.parse(latin_stuff), \ XmlCtsTextInventoryMetadata.parse(greek_stuff), \ XmlCtsTextInventoryMetadata.parse(farsi_stuff) self.assertEqual(len(latin_stuff.readableDescendants), 19, "There should be 19 readable descendants in Latin") self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata, "should be textinventory") self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual(len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit") self.assertEqual( greek_stuff.get_label("fre"), None, # Text inventory have no label in CTS "Label should be correct") get_graph().remove((None, None, None)) all = XmlCtsTextInventoryMetadata.parse(all) self.assertEqual( len(all.readableDescendants), 25, "There should be all 25 readable descendants in the master collection" )
def test_dispatching_latin_greek(self): tic = CtsTextInventoryCollection() latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic) farsi.set_label("Farsi", "eng") gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") gc.set_label("Grec Ancien", "fre") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False @dispatcher.inventory("urn:perseus:farsiLit") def dispatchfFarsiLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:farsiLit:"): return True return False @dispatcher.inventory("urn:perseus:greekLit") def dispatchGreekLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True return False resolver = CtsCapitainsLocalResolver( ["./tests/testing_data/latinLit2"], dispatcher=dispatcher ) latin_stuff = resolver.getMetadata("urn:perseus:latinLit") greek_stuff = resolver.getMetadata("urn:perseus:greekLit") farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit") self.assertEqual( len(latin_stuff.readableDescendants), 20, "There should be 20 readable descendants in Latin" ) self.assertIsInstance( latin_stuff, CtsTextInventoryMetadata, "should be textinventory" ) self.assertEqual( len(greek_stuff.readableDescendants), 6, "There should be 6 readable descendants in Greek [6 only in __cts__.xml]" ) self.assertEqual( len(farsi_stuff.descendants), 0, "There should be nothing in FarsiLit" ) self.assertEqual( str(greek_stuff.get_label("fre")), "Grec Ancien", "Label should be correct" ) with self.assertRaises(KeyError): _ = latin_stuff["urn:cts:greekLit:tlg0003"]
from MyCapytain.resources.prototypes.cts.inventory import CtsTextInventoryCollection, CtsTextInventoryMetadata from MyCapytain.resolvers.utils import CollectionDispatcher general_collection = CtsTextInventoryCollection() formulae = CtsTextInventoryMetadata('formulae_collection', parent=general_collection) formulae.set_label('Formulae', 'ger') formulae.set_label('Formulae', 'eng') formulae.set_label('Formulae', 'fre') chartae = CtsTextInventoryMetadata('other_collection', parent=general_collection) chartae.set_label('Andere Texte', 'ger') chartae.set_label('Other Texts', 'eng') chartae.set_label('Autres Textes', 'fre') elexicon = CtsTextInventoryMetadata('lexicon_entries', parent=general_collection) elexicon.set_label('Lexikon', 'ger') elexicon.set_label('Lexicon', 'eng') elexicon.set_label('Lexique', 'fre') organizer = CollectionDispatcher(general_collection, default_inventory_name='other_collection') @organizer.inventory("formulae_collection") def organize_formulae(collection, path=None, **kwargs): if collection.id.startswith('urn:cts:formulae:andecavensis'): return True return False @organizer.inventory("lexicon_entries")
from MyCapytain.common.reference import URN from MyCapytain.resources.prototypes.cts.inventory import CtsTextInventoryCollection as TextInventoryCollection, CtsTextInventoryMetadata as PrototypeTextInventory from MyCapytain.resolvers.utils import CollectionDispatcher from capitains_nautilus.cts.resolver import NautilusCTSResolver from flask_nemo import Nemo from flask_nemo.chunker import level_grouper from capitains_nautilus.flask_ext import FlaskNautilus from alpheios_nemo_ui import AlpheiosNemoUI, scheme_grouper from alpheios_nemo_ui.plugins.alpheios_breadcrumb import AlpheiosBreadcrumb from authlib.flask.client import OAuth d = "./texts" # tic = TextInventoryCollection() latin = PrototypeTextInventory("urn:perseus:latinLit", parent=tic) latin.set_label("Classical Latin", "eng") #ara = PrototypeTextInventory("urn:alpheios:arabicLit", parent=tic) #ara.set_label("Classical Arabic", "eng") gc = PrototypeTextInventory("urn:perseus:greekLit", parent=tic) gc.set_label("Ancient Greek", "eng") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:perseus:latinLit") def dispatchLatinLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:"): return True return False
from MyCapytain.resources.prototypes.cts.inventory import CtsTextInventoryCollection, CtsTextInventoryMetadata from MyCapytain.resolvers.utils import CollectionDispatcher tic = CtsTextInventoryCollection() positions = CtsTextInventoryMetadata("urn:cts:frenchLit", parent=tic) positions.set_label("Positions de thèse", "fr") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:cts:frenchLit") def dispatchFrenchLit(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:frenchLit:"): return True return False
from MyCapytain.resolvers.utils import CollectionDispatcher from capitains_nautilus.cts.resolver import NautilusCTSResolver from capitains_nautilus.flask_ext import FlaskNautilus import logging import urllib # We import enough resources from MyCapytain to retrieve data from MyCapytain.resolvers.cts.api import HttpCtsResolver from MyCapytain.retrievers.cts5 import HttpCtsRetriever # We create a Flask app app = Flask( __name__ ) tic = TextInventoryCollection() fro = PrototypeTextInventory("urn:geste", parent=tic) #Rien à voir avec les identifiants cts, c'est un identifiant de projet fro.set_label("Corpus de chansons de geste", "fro") dispatcher = CollectionDispatcher(tic) @dispatcher.inventory("urn:geste") def dispatchGeste(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:froLit"): #et cette fois, c'est bien du cts et on file le début des chemins de citation. return True return False cache = Cache() NautilusDummy = NautilusCTSResolver( [ "."
from MyCapytain.resources.prototypes.cts.inventory import CtsTextInventoryCollection, CtsTextInventoryMetadata from MyCapytain.resolvers.utils import CollectionDispatcher from capitains_nautilus.cts.resolver import NautilusCTSResolver # Setting up the collections general_collection = CtsTextInventoryCollection() greek_texts = CtsTextInventoryMetadata("mnemosyne:grec", parent=general_collection) greek_texts.set_label("Greek Texts", "eng") greek_texts.set_label("Textes Grecs", "fre") latin_texts = CtsTextInventoryMetadata("mnemosyne:latin", parent=general_collection) latin_texts.set_label("Latin Texts", "eng") latin_texts.set_label("Textes Latins", "fre") misc = CtsTextInventoryMetadata("mnemosyne:misc", parent=general_collection) misc.set_label("Miscellaneous", "eng") misc.set_label("Textes Divers", "fre") organizer = CollectionDispatcher(general_collection, default_inventory_name="mnemosyne:misc") @organizer.inventory("mnemosyne:grec") def organize_my_grec(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:greekLit:"): return True
from MyCapytain.resources.prototypes.cts.inventory import CtsTextInventoryCollection, CtsTextInventoryMetadata from MyCapytain.resolvers.utils import CollectionDispatcher from capitains_nautilus.cts.resolver import NautilusCtsResolver # Setting up the collections general_collection = CtsTextInventoryCollection() poetry = CtsTextInventoryMetadata("poetry_collection", parent=general_collection) poetry.set_label("Poetry", "eng") poetry.set_label("Poésie", "fre") priapeia = CtsTextInventoryMetadata("priapeia_collection", parent=general_collection) priapeia.set_label("Priapeia", "eng") priapeia.set_label("Priapées", "fre") misc = CtsTextInventoryMetadata("id:misc", parent=general_collection) misc.set_label("Miscellaneous", "eng") misc.set_label("Textes Divers", "fre") organizer = CollectionDispatcher(general_collection, default_inventory_name="id:misc") @organizer.inventory("priapeia_collection") def organize_my_priapeia(collection, path=None, **kwargs): if collection.id.startswith("urn:cts:latinLit:phi1103"): return True return False
from MyCapytain.resolvers.utils import CollectionDispatcher from capitains_nautilus.cts.resolver import NautilusCTSResolver # Setting up the collections general_collection = CtsTextInventoryCollection() # poetry = CtsTextInventoryMetadata("poetry_collection", parent=general_collection) # poetry.set_label("Poetry", "eng") # poetry.set_label("Poésie", "fre") # priapeia = CtsTextInventoryMetadata("priapeia_collection", parent=general_collection) # priapeia.set_label("Priapeia", "eng") # priapeia.set_label("Priapées", "fre") hchn = CtsTextInventoryMetadata("hchn", parent=general_collection) hchn.set_label("HCHN", "eng") hchn.set_label("HCHN", "fre") organizer = CollectionDispatcher(general_collection, default_inventory_name="hchn") # @organizer.inventory("priapeia_collection") # def organize_my_priapeia(collection, path=None, **kwargs): # if collection.id.startswith("urn:cts:latinLit:phi1103"): # return True # return False # @organizer.inventory("poetry_collection") # def organize_my_poetry(collection, path=None, **kwargs): # # If we are not dealing with Priapeia # if not collection.id.startswith("urn:cts:latinLit:phi1103"):