def reset(self): """ Logotheras.reset """ self.errors = Errors() self.data = LogotherasData(self.errors) self.textdbdict = None # <TextDBDict> object self.statistics = None # None or <Statistics> object self.srclanguage = None # Language object self.dchars = None # None or a DChars'DString object. self.hermaia = None # None or an Hermaia object self.hermaia_database = None # None or a sqlite3.Connection object self.hermaia_dumpfile = None # None or a _io.TextIOWrapper object (=open())
class Logotheras(object): """ class Logotheras .data : <LogotherasData> object .errors : <data/errors.py::Errors> object .srclanguage : Language object .textdbdict : <TextDBDict> object .statistics : None or <Statistics> object .dchars : None or a DChars'DString object. .hermaia : None or an Hermaia object .hermaia_database : None or a sqlite3.Connection object .hermaia_dumpfile : None or a _io.TextIOWrapper object (=open()) Initialized by calling .initFromTextDBDictPath() Don't forget to call the exit() method on exit. """ # /////////////////////////////////////////////////////////////////////////// def __init__(self): """ Logotheras.__init__ """ self.reset() self.errors.info("#" * 30) self.errors.info("new Logotheras object") self.errors.info("#" * 30) # /////////////////////////////////////////////////////////////////////////// def add_the_tobeduplicated_entries(self): """ Logotheras.add_the_tobeduplicated_entries() ________________________________________________________________ For each to-be-duplicated entry, create the corresponding article and add the entry into self.data ________________________________________________________________ Return the number of (tobeduplicated_articles, important tobeduplicated_articles) """ nbr0 = 0 # number of tobeduplicated_articles nbr1 = 0 # number of important tobeduplicated_articles # see Logotheras.initToBeDuplicatedArticles() about the format of # self.data.tobeduplicated_articles. for ( article_header_artiname, entry_title, entry_hlevel, string, important_str_to_be_duplicated, ) in self.data.tobeduplicated_articles: new_article = ArticleData(errors=self.errors, artiname_prefix="") new_article.fill_the_article_as_a_tobeduplicated_entry( string=string, article_header_artiname=article_header_artiname, entry_title=entry_title, entry_hlevel=entry_hlevel, important_entry=important_str_to_be_duplicated, ) if string in self.data: err_msg = ( "(ERR069) Can't add the to-be-duplicated entry '{0}' : " "a similar entry already exists !".format(string) ) self.errors.error(err_msg) else: self.data[string] = new_article nbr0 += 1 if important_str_to_be_duplicated: nbr1 += 1 return (nbr0, nbr1) # /////////////////////////////////////////////////////////////////////////// def areTheArticlesInAlphabetOrder(self): """ Logotheras.areTheArticlesInAlphabetOrder The function uses the 'sortingname' value to compare the so-called 'alphabetical' values. """ previous_artiname = None previous_sortingname = None for artiname, sortingname in self.data.get_names_and_sortingnames(): if previous_artiname is not None: order_ok = self.srclanguage.areAlphabeticallySorted((previous_sortingname, sortingname)) if not order_ok: msg = ( "(ERR068) Misplaced article " "'{0}' (sortingname='{1}'), not in the right " "(computed) alphabetical order compared to " "'{2}' (sortingname='{3}') " "Current order : '{2}' ('{3}') < '{0}' ('{1}'); " ) self.errors.error(msg.format(artiname, sortingname, previous_artiname, previous_sortingname)) previous_sortingname = sortingname previous_artiname = artiname # /////////////////////////////////////////////////////////////////////////// def checkDictNamesAndFullNames(self): """ Logotheras.checkDictNamesAndFullNames """ if not logotheras.options.OPTIONS["warning if artiname equal to fullname"]: return for article_artiname in self.data: if article_artiname == self.data[article_artiname].headerdata.fullname: msg = "(WAR011) " "The article's title '{0}' is a artiname and a fullname." self.errors.warning(msg.format(article_artiname)) # /////////////////////////////////////////////////////////////////////////// def checkLinkInfo(self, link): """ Logotheras.checkLinkInfo link : LinkInfo object """ if not link.artiname in self.data: msg = "(ERR067) Bad link : unknown artiname; link={0}" self.errors.error(msg.format(repr(link))) else: article = self.data[link.artiname] if not article.bodydata.isTitleAnExistingEntryTitle(link.entryname): msg = "(ERR066) Bad link : unknown dictentry; link={0}" self.errors.error(msg.format(repr(link))) # /////////////////////////////////////////////////////////////////////////// def checkLinks(self): """ Logotheras.checkLinks Verify that every link aims to a known entry. """ for extractdata in self.data.getExtracts(extractdata_only=True): for linkinfo in extractdata.textdata.links: self.checkLinkInfo(linkinfo) if extractdata.translationdata is not None: for linkinfo in extractdata.translationdata.links: self.checkLinkInfo(linkinfo) if extractdata.transcriptiondata is not None: for linkinfo in extractdata.transcriptiondata.links: self.checkLinkInfo(linkinfo) if extractdata.commentarydata is not None: for linkinfo in extractdata.commentarydata.links: self.checkLinkInfo(linkinfo) # /////////////////////////////////////////////////////////////////////////// def checksAndStatistics(self): """ Logotheras.checksAndStatistics() Launch some tests and initialize <self.statistics> """ res_checkinformationsdata = self.data.informations.checkInformationsDataIntegrity(errors=self.errors) # we go on only if the preceding test is ok : if res_checkinformationsdata: self.checkLinks() self.checkTranslationSources() self.checkTranscriptionSources() self.checkDictNamesAndFullNames() self.checkSimilarExtracts() if self.errors.number_of_errors == 0: self.statistics = Statistics(logotherasdata=self.data, logotheras_errors=self.errors).init() self.checkUnusedAuthors() self.checkUnusedTitles() # /////////////////////////////////////////////////////////////////////////// def checkSimilarExtracts(self): """ Logotheras.checkSimilarExtracts() """ if logotheras.options.OPTIONS["warning if same workref+different txt"] == "never": return # data t[itle] a[uthor] p[osition] : # title_author_position : (extractdata.textdata, article_artiname, entry_title) datatap = {} # stored_differences : ( (str)str1, (str)str2 ) # # Let be three texts, A, B and C, with A=B and C being different from A and B. # The purpose of this function is to answer this question : What's the differences # between A, B, C ? There's three ways to answer the question : either "A<>C and B<>C" # either "A<>C", either "B<>C" . The result is mathematically the same but for the program # it depends on the order of the operations : if the program reads C, then A and B it will # display "A<>C and B<>C"; but if the program reads A, then B, then C it will # only display "B<>C". # To avoid such differences, this function keeps in memory the strings defined as # different. If the function knows that A<>C, it can't add that "B<>C" since # <stored_differences> says already that B(=A)<>C. stored_differences = [] for (article_artiname, entry_title, extractdata) in self.data.getExtracts(existing_workreferencedata=True): title_author_position = extractdata.workreferencedata.getReprTAP() if title_author_position not in datatap: # new tap-workreference : # we fill <datatap> with this new tap-workreference : datatap[title_author_position] = (extractdata.textdata, article_artiname, entry_title) else: # this tap-workreference is already known : is it the same text ? (tap__textdata, tap__artiname, tap__entrytitle) = datatap[title_author_position] (similar_strings, substring_case) = extractdata.textdata.getSimilarity(tap__textdata) if ( not similar_strings and (tap__textdata.text, extractdata.textdata.text) not in stored_differences and (extractdata.textdata.text, tap__textdata.text) not in stored_differences ): stored_differences.append((tap__textdata.text, extractdata.textdata.text)) if ( substring_case and logotheras.options.OPTIONS["warning if same workref+different txt"] == "always" ): msg = ( "(ERR065) Two extracts share " "the same workreference '{0}' " "but have two different texts " "(one text is a substring of the other one) " "\n(1) article='{1}'; entry='{2}';" "\n(2) article='{4}'; entry='{5}';" "\n (texte1) '{3}'" "\n (texte2) '{6}'" "\n (diff) {7}" ) diff = strdiff( TextData.cleanBeforeCmp(extractdata.textdata.text), TextData.cleanBeforeCmp(tap__textdata.text), ) self.errors.error( msg.format( title_author_position, article_artiname, entry_title, extractdata.textdata.text, tap__artiname, tap__entrytitle, tap__textdata.text, diff, ) ) else: # error : substring l'une de l'autre msg = ( "(ERR064) Two extracts share " "the same workreference '{0}' " "but have two different texts " "\n(1) article='{1}'; entry='{2}';" "\n(2) article='{4}'; entry='{5}';" "\n (texte1) '{3}'" "\n (texte2) '{6}'" "\n (diff) {7}" ) diff = strdiff( TextData.cleanBeforeCmp(extractdata.textdata.text), TextData.cleanBeforeCmp(tap__textdata.text), ) self.errors.error( msg.format( title_author_position, article_artiname, entry_title, extractdata.textdata.text, tap__artiname, tap__entrytitle, tap__textdata.text, diff, ) ) # /////////////////////////////////////////////////////////////////////////// def checkTranscriptionSources(self): """ Logotheras.checkTranscriptionSources """ for (article_artiname, entry_title, extractdata) in self.data.getExtracts(existing_workreferencedata=True): if extractdata.transcriptiondata is not None: source = extractdata.transcriptiondata.source error = False if source is None and logotheras.options.OPTIONS["error if unsourced transcription"]: error = True elif source is not None and source not in self.data.informations.transcriptions_sources: error = True if error: msg = ( "(ERR063) Unknown transcription source '{0}', " "not defined in the informations file; " "this source appears in the article '{1}', " "in the entry '{2}'; " "transcription data = {3}" ) self.errors.error(msg.format(source, article_artiname, entry_title, extractdata.transcriptiondata)) # /////////////////////////////////////////////////////////////////////////// def checkTranslationSources(self): """ Logotheras.checkTranslationSources """ for (article_artiname, entry_title, extractdata) in self.data.getExtracts(): if extractdata.translationdata is not None: source = extractdata.translationdata.source error = False if source is None and logotheras.options.OPTIONS["error if unsourced translation"]: error = True elif source is not None and source not in self.data.informations.translations_sources: error = True if error: msg = ( "(ERR062) Unknown translation source '{0}', " "not defined in the informations file; " "this source appears in the article '{1}', " "in the entry '{2}'; " "translation data = {3}" ) self.errors.error(msg.format(source, article_artiname, entry_title, extractdata.translationdata)) # /////////////////////////////////////////////////////////////////////////// def checkUnusedAuthors(self): """ Logotheras.checkUnusedAuthors """ for author in self.data.informations.authors: if author not in self.statistics.used_authors: msg = "(WAR010) " "Unused author : '{0}'" self.errors.warning(msg.format(author)) # /////////////////////////////////////////////////////////////////////////// def checkUnusedTitles(self): """ Logotheras.checkUnusedTitles """ for title in self.data.informations.works_titles: if title not in self.statistics.used_titles: msg = "(WAR009) " "Unused title : '{0}'" self.errors.warning(msg.format(title)) # /////////////////////////////////////////////////////////////////////////// def convertLocalNamesIntoRealNames(self): """ Logotheras.convertLocalNamesIntoRealNames Search the authors' names and the works' titles; if a name/title is a localized one, use the real name instead. """ for extractdata in self.data.getExtracts(existing_workreferencedata=True, extractdata_only=True): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ author = extractdata.workreferencedata.author details = self.data.informations.getDetailsAboutAuthorsLocaName(author) real_author = details[0] if real_author is not None: # <author> is a localized name : extractdata.workreferencedata.author = real_author # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ title = extractdata.workreferencedata.title details = self.data.informations.getDetailsAboutTitleLocaName(title) real_title = details[0] if real_title is not None: # <title> is a localized name : extractdata.workreferencedata.title = real_title # /////////////////////////////////////////////////////////////////////////// def exit(self): """ Logotheras.exit() ________________________________________________________________ o creating the Hermaia dumpfile o closing the Hermaia object and the Hermaia database/dumpfile. ________________________________________________________________ RETURNED VALUE : (int) 0 if everything's ok (int) -1 if a problem occured (=if self.errors.number_of_errors is > 0) """ # ----------------------------------------------------------------------- # creating the Hermaia dumpfile : if self.hermaia_dumpfile is not None: self.hermaia.write_database_into_dumpfile() # ----------------------------------------------------------------------- # closing the Hermaia object and the Hermaia database/dumpfile : if self.hermaia is not None: self.hermaia.exit() if self.hermaia_database is not None: self.hermaia_database.close() if self.hermaia_dumpfile is not None: self.hermaia_dumpfile.close() if self.errors.number_of_errors == 0: return 0 else: return -1 # /////////////////////////////////////////////////////////////////////////// def incrementVersionNumber(self): """ Logotheras.incrementVersionNumber """ self.data.informations.version_number += 1 # /////////////////////////////////////////////////////////////////////////// def initFromTextDBDictPath(self, path): """ Logotheras.initFromTextDBDictPath ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PARAMETER : o <path> : (str) NO RETURN VALUE """ textdbdict = TextDBDict(logotherasdata=self.data, errors=self.errors) self.errors.debug("calling Logotheras.initFromPath()...") self.textdbdict, success = textdbdict.initFromPath(path=path, logotheras_object=self) if success: # ................................................................... # creating the file(s) required by the Hermaia module and the object # self.hermaia itself. if "database" in logotheras.options.OPTIONS["HERMAIA"]: dest_database = os.path.join( logotheras.options.OPTIONS["HERMAIAoutput"], logotheras.options.OPTIONS["hermaia::database's filename"], ) self.errors.info("creating a database to be filled " "with the Hermaia module : " + dest_database) self.hermaia_database = sqlite3.connect(dest_database) if "dumpfile" in logotheras.options.OPTIONS["HERMAIA"]: dest_dumpfile = os.path.join( logotheras.options.OPTIONS["HERMAIAoutput"], logotheras.options.OPTIONS["hermaia::dumpfile's filename"], ) self.errors.info("creating a dumpfile to be filled with " "the Hermaia module : " + dest_dumpfile) self.hermaia_dumpfile = open(dest_dumpfile, "w") if "database" in logotheras.options.OPTIONS["HERMAIA"]: self.hermaia = Hermaia(database=self.hermaia_database, dumpfile=self.hermaia_dumpfile) self.hermaia.enter() # ................................................................... if logotheras.options.OPTIONS["error if wrong alphabetical order"]: self.errors.debug("calling Logotheras.areTheArticlesInAlphabetOrder()...") self.areTheArticlesInAlphabetOrder() # ................................................................... self.errors.debug("calling Logotheras.initToBeDuplicatedArticles()...") self.initToBeDuplicatedArticles() # ................................................................... self.errors.debug("calling Logotheras.add_the_tobeduplicated_entries()...") (nbr_tobedup_articles, nbr_important_tobedup_articles) = self.add_the_tobeduplicated_entries() msg = "-> number of to-be-duplicated articles : {0} " "(important ones : {1})".format( nbr_tobedup_articles, nbr_important_tobedup_articles ) self.errors.debug(msg.format()) # ................................................................... # initialize <self.data.keys_in_alphabetical_order> : self.errors.debug("calling Logotheras.setAlphabeticalOrder()...") self.setAlphabeticalOrder() # ................................................................... self.errors.debug("calling Logotheras.convertLocalNamesIntoRealNames()...") self.convertLocalNamesIntoRealNames() # ................................................................... # initialize <self.data.keys_in_alphabetical_order> : self.errors.debug("calling Logotheras.setAlphabeticalOrder()...") self.setAlphabeticalOrder() # ................................................................... self.errors.debug("calling Logotheras.setArticlesIndex()...") self.setArticlesIndex() # ................................................................... self.errors.debug("calling Logotheras.checksAndStatistics()...") self.checksAndStatistics() return self # /////////////////////////////////////////////////////////////////////////// def initToBeDuplicatedArticles(self): """ Logotheras.initToBeDuplicatedArticles Initialize <self.data.tobeduplicated_articles>. format of self.data.tobeduplicated_articles : ( article.header.artiname, entry.title, entry.hlevel, string, (bool)important string to be duplicated ) """ self.data.tobeduplicated_articles.clear() for article_artiname in self.data: for entry_title in self.data[article_artiname].bodydata: entry = self.data[article_artiname].bodydata[entry_title] if entry.entry_to_be_duplicated is not None: fullname = ( entry.entry_to_be_duplicated + "~/~TOBEDUPLICATED:::" + article_artiname + ":::" + entry.title ) new_article = ( article_artiname, entry.title, entry.hlevel, fullname, entry.important_entry_to_be_dup, ) self.data.tobeduplicated_articles.append(new_article) self.errors.debug("Let's add a new to-be-duplicated entry " "named '{0}'".format(fullname)) # /////////////////////////////////////////////////////////////////////////// def reset(self): """ Logotheras.reset """ self.errors = Errors() self.data = LogotherasData(self.errors) self.textdbdict = None # <TextDBDict> object self.statistics = None # None or <Statistics> object self.srclanguage = None # Language object self.dchars = None # None or a DChars'DString object. self.hermaia = None # None or an Hermaia object self.hermaia_database = None # None or a sqlite3.Connection object self.hermaia_dumpfile = None # None or a _io.TextIOWrapper object (=open()) # /////////////////////////////////////////////////////////////////////////// def setAlphabeticalOrder(self): """ Logotheras.setAlphabeticalOrder Initialize self.data.keys_in_alphabetical_order """ def sorting_key(row): """ function used infra to sort the artinames/sortingnames : Since self.data.get_names_and_sortingnames() returns two objects (artiname, sortingname) in a row, we use the last one, namely "row[1]" : """ if self.srclanguage.dchars is None: # DChars isn't available : return row[1] else: # DChars is available : return self.srclanguage.dchars(row[1]).sortingvalue() # we sort the keys in self.data according to the sorting name : res_sort = sorted(self.data.get_names_and_sortingnames(), key=sorting_key) self.data.keys_in_alphabetical_order = tuple(artiname for (artiname, sortingname) in res_sort) # /////////////////////////////////////////////////////////////////////////// def setArticlesIndex(self): """ Logotheras.setArticlesIndex Add to each article an information : the index number of the article. """ for index, articledata in enumerate(self.data.getArticles()): articledata.headerdata.informations["index"] = "#" + str(index) # /////////////////////////////////////////////////////////////////////////// def setDChars(self, source_language): """ Logotheras.setDChars() Initialize .dchars """ self.dchars = None if not logotheras.options.OPTIONS["use DChars"]: return try: import dchars import dchars.system.numversion import dchars.languages_name import dchars.dchars dchars_version = dchars.system.numversion.VersionOfTheProgram().numversion self.errors.debug("found DChars library version " + dchars_version) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # too old version of DChars ? if StrictVersion(dchars_version) < StrictVersion(MINIMAL_VERSION_OF_DCHARS): self.errors.warning( "(WAR008) " "DChars' version is too old; " "this program requires at least the version " + MINIMAL_VERSION_OF_DCHARS + " ." ) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # DChars can't deal with the source language ? elif source_language not in dchars.languages_name.LANGUAGES_NAME: msg = "Can't use DChars for the '{0}' language. " "Known languages are : {1}".format( source_language, dchars.languages_name.LANGUAGES_NAME ) self.errors.info(msg) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ok, let's use DChars with the source language : else: # ok, let's try to use the DChars library : msg = "DChars will be used to deal with the '{0}' language." self.errors.info(msg.format(source_language)) # transliteration method ? _trans = None # None for the default transliteration if ( logotheras.options.OPTIONS["textdbdict::automatic_transliteration::to_be_used"] == "yes" and logotheras.options.OPTIONS["textdbdict::automatic_transliteration::method"] != "" ): _trans = logotheras.options.OPTIONS["textdbdict::automatic_transliteration::method"] # options ? _options = {"anonymize the unknown characters": "no"} try: self.dchars = dchars.dchars.new_dstring( language=source_language, transliteration_method=_trans, options=_options ) # ... ok, DChars can be used. except: msg = ( "(ERR061) The DChar's library returned an error message; " "can't initialize any DChars object with the following parameters : " "language='{0}';" "transliteration_method='{1}'; " "options={2}. " "Since the DChars library can't be used, many other error messages may appear ! " "Check the parameters passed to DChars." ) self.errors.error(msg.format(source_language, _trans, _options)) # ... problem : DChars can't be used. self.dchars = None except ImportError: self.errors.info("DChars library not available.") # /////////////////////////////////////////////////////////////////////////// def updateCreationDate(self): """ Logotheras.updateCreationDate """ self.data.informations.creation_date = datetime.datetime.now() # /////////////////////////////////////////////////////////////////////////// def writeDataBaseInto(self, destfile): """ Logotheras.writeDataBaseInto destfile : destination path """ self.errors.debug("Logotheras.writeDataBaseInto : destfile='{0}'".format(destfile)) dbdict = DBDict(logotherasdata=self.data, errors=self.errors) dbdict.writeDataBaseInto(destfile=destfile, srclanguage=self.srclanguage) # /////////////////////////////////////////////////////////////////////////// def writeRSTInto(self, sourcepath, destpath): """ Logotheras.writeRSTInto sourcepath : source path (e.g. '~/projects/phokaia') destpath : destination path """ self.errors.debug("Logotheras.writeRSTInto : sourcepath='{0}'; destpath='{1}'".format(sourcepath, destpath)) rst = RST(logotherasdata=self.data, errors=self.errors, hermaia=self.hermaia) rst.writeRSTInto( sourcepath=sourcepath, destpath=destpath, srclanguage=self.srclanguage, title=logotheras.options.OPTIONS["rst::project's title"], maintoctree_maxdepth=1, ) # /////////////////////////////////////////////////////////////////////////// def writeTextDBDictInto(self, destpath): """ Logotheras.writeTextDBDictInto destpath : destination path """ self.errors.debug("Logotheras.writeTextDBDictInto : destpath='{0}'".format(destpath)) self.textdbdict.writeTextDBDictInto(destpath=destpath, srclanguage=self.srclanguage)