def getStrongsEntryHTML( self, key:str ) -> Optional[str]: """ The key is a Hebrew Strong's number (string) like 'H1979'. Returns an HTML li entry for the given key. Returns None if the key is not found. e.g., for H1, returns: <li value="1" id="ot:1"><i title="{awb}" xml:lang="hbo">אָב</i> a primitive word; father, in a literal and immediate, or figurative and remote application): <span class="kjv_def">chief, (fore-)father(-less), X patrimony, principal</span>. Compare names in "Abi-".</li> <li value="165" id="ot:165"><i title="{e-hee'}" xml:lang="hbo">אֱהִי</i> apparently an orthographical variation for <a href="#ot:346"><i title="{ah-yay'}" xml:lang="hbo">אַיֵּה</i></a>; where: <span class="kjv_def">I will be (Hos</span>. 13:10, 14) (which is often the rendering of the same Hebrew form from <a href="#ot:1961"><i title="{haw-yaw}" xml:lang="hbo">הָיָה</i></a>).</li> """ vPrint( 'Never', debuggingThisModule, _("BibleLexicon.getStrongsEntryHTML( {} )").format( repr(key) ) ) if key.startswith( 'H' ): return self.hLexicon.getStrongsEntryHTML( key ) if key.startswith( 'G' ): return self.gLexicon.getStrongsEntryHTML( key )
def getDivisionAbbreviation(self, divisionNameOrAbbreviation): """ Get the division standardAbbreviation from the given division name or abbreviation. (Automatically converts to upper case before comparing strings.) """ if BibleOrgSysGlobals.debugFlag: assert divisionNameOrAbbreviation upperCaseDivisionNameOrAbbreviation = divisionNameOrAbbreviation.upper( ) if upperCaseDivisionNameOrAbbreviation in self.__sortedDivisionNamesDict: #vPrint( 'Quiet', debuggingThisModule, self.__sortedDivisionNamesDict[upperCaseDivisionNameOrAbbreviation], self.__divisionsNamesDict[self.__sortedDivisionNamesDict[upperCaseDivisionNameOrAbbreviation]]['defaultAbbreviation'] ) return self.__sortedDivisionNamesDict[ upperCaseDivisionNameOrAbbreviation] if BibleOrgSysGlobals.debugFlag: # It failed so print what the closest alternatives were vPrint('Quiet', debuggingThisModule, "getDivisionAbbrev", divisionNameOrAbbreviation, upperCaseDivisionNameOrAbbreviation) myList, thisLen = [], len(upperCaseDivisionNameOrAbbreviation) for key in self.__sortedDivisionNamesDict.keys(): if key.startswith(upperCaseDivisionNameOrAbbreviation[0] ) and len(key) == thisLen: myList.append(key) vPrint('Quiet', debuggingThisModule, "Possibility list is", myList)
def load(self) -> None: """ Load the pickle file if it's there, Otherwise use the converter to load the XML (slower). """ fnPrint(debuggingThisModule, "GreekLexicon.load()") assert self.StrongsEntries is None standardPickleFilepath = BibleOrgSysGlobals.BOS_DISTRIBUTED_FILES_FOLDERPATH.joinpath( 'GreekLexicon_Strongs_Table.1.pickle') if standardPickleFilepath.is_file(): import pickle vPrint('Info', debuggingThisModule, f"Loading pickle file {standardPickleFilepath}…") with open(standardPickleFilepath, 'rb') as pickleFile: self.StrongsEntries = pickle.load( pickleFile ) # The protocol version used is detected automatically, so we do not have to specify it else: # Load the original XML from BibleOrgSys.OriginalLanguages.Converters.GreekLexiconConverter import GreekStrongsFileConverter gStr = GreekStrongsFileConverter() # Create the empty object gStr.loadAndValidate(self.XMLFolder) # Load the XML self.StrongsEntries = gStr.importDataToPython()
def fullDemo() -> None: """ Full demo to check class is working """ BibleOrgSysGlobals.introduceProgram( __name__, programNameVersion, LAST_MODIFIED_DATE ) AutoProcessesFolder = "../../" osisSchemaHTTP = 'http://ebible.org/osisCore.2.1.1.xsd' osisSchemaFile = os.path.join( AutoProcessesFolder, 'sword-tools/thml2osis/xslt/tests/osisCore.2.1.1.xsd' ) usxSchemaFile = os.path.join( AutoProcessesFolder, 'VariousScripts/usx 1.rng' ) def doTest( folder, filenameList, schema=None ): for testFilename in filenameList: #testFilepath = os.path.join( folder, testFilename ) #vPrint( 'Quiet', debuggingThisModule, "\n Test filepath is {!r}".format( testFilepath ) ) # Demonstrate the XML file class #xf = XMLFile( testFilepath, schema=schema ) xf = XMLFile( testFilename, folder, schema=schema ) xf.validateByLoading() xf.validateWithLint() #vPrint( 'Quiet', debuggingThisModule, xf.validateAll() ) vPrint( 'Quiet', debuggingThisModule, xf ) # end of doTest if 1: # Test some OpenSong Bibles testFolder = Path( '/mnt/SSDs/Bibles//OpenSong Bibles/' ) single = ( "KJV.xmm", ) good = ( "KJV.xmm", "AMP.xmm", "Chinese_SU.xmm", "Contemporary English Version.xmm", "ESV", "Italiano", "MKJV", \ "MSG.xmm", "NASB.xmm", "NIV", "NKJV.xmm", "NLT", "telugu.xmm", ) nonEnglish = ( "BIBLIA warszawska", "Chinese Union Version Simplified.txt", "hun_karoli", "KNV_HU", "LBLA.xmm", \ "Nowe Przymierze", "NVI.xmm", "NVI_PT", "PRT-IBS.xmm", "RV1960", "SVL.xmm", "UJPROT_HU", "vdc", \ "Vietnamese Bible.xmm", ) bad = ( "EPS99", ) allOfThem = good + nonEnglish + bad vPrint( 'Normal', debuggingThisModule, "\n\nDemonstrating the XMLFile class with OpenSong Bibles…" ) doTest( testFolder, allOfThem ) if 1: # Test some OSIS Bibles testFolder = Path( '/mnt/SSDs/Bibles/Formats/OSIS/kjvxml from DMSmith/' ) testNames = ( "kjv.xml", "kjvfull.xml", "kjvlite.xml", ) vPrint( 'Normal', debuggingThisModule, "\n\nDemonstrating the XMLFile class with OSIS Bibles (no schema)…" ) doTest( testFolder, testNames ) vPrint( 'Normal', debuggingThisModule, "\n\nDemonstrating the XMLFile class with OSIS Bibles (file schema)…" ) doTest( testFolder, testNames, schema=osisSchemaFile ) vPrint( 'Normal', debuggingThisModule, "\n\nDemonstrating the XMLFile class with OSIS Bibles (web schema)…" ) doTest( testFolder, (testNames[0],), schema=osisSchemaHTTP )
def loadSystems( self, XMLFolder=None ): """ Load and pre-process the specified book order systems. """ if not self._XMLSystems: # Only ever do this once if XMLFolder is None: XMLFolder = BibleOrgSysGlobals.BOS_DATAFILES_FOLDERPATH.joinpath( 'BookOrders/' ) # Relative to module, not cwd self.__XMLFolder = XMLFolder vPrint( 'Info', debuggingThisModule, _("Loading book order systems from {}…").format( self.__XMLFolder ) ) filenamePrefix = "BIBLEBOOKORDER_" for filename in os.listdir( self.__XMLFolder ): filepart, extension = os.path.splitext( filename ) if extension.upper() == '.XML' and filepart.upper().startswith(filenamePrefix): bookOrderSystemCode = filepart[len(filenamePrefix):] vPrint( 'Verbose', debuggingThisModule, _(" Loading{} book order system from {}…").format( bookOrderSystemCode, filename ) ) self._XMLSystems[bookOrderSystemCode] = {} self._XMLSystems[bookOrderSystemCode]['tree'] = ElementTree().parse( os.path.join( self.__XMLFolder, filename ) ) assert self._XMLSystems[bookOrderSystemCode]['tree'] # Fail here if we didn't load anything at all # Check and remove the header element if self._XMLSystems[bookOrderSystemCode]['tree'].tag == self.XMLTreeTag: header = self._XMLSystems[bookOrderSystemCode]['tree'][0] if header.tag == self.headerTag: self._XMLSystems[bookOrderSystemCode]['header'] = header self._XMLSystems[bookOrderSystemCode]['tree'].remove( header ) BibleOrgSysGlobals.checkXMLNoText( header, 'header' ) BibleOrgSysGlobals.checkXMLNoTail( header, 'header' ) BibleOrgSysGlobals.checkXMLNoAttributes( header, 'header' ) if len(header)>1: logging.info( _("Unexpected elements in header") ) elif len(header)==0: logging.info( _("Missing work element in header") ) else: work = header[0] BibleOrgSysGlobals.checkXMLNoText( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoTail( work, "work in header" ) BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header" ) if work.tag == "work": self._XMLSystems[bookOrderSystemCode]['version'] = work.find('version').text self._XMLSystems[bookOrderSystemCode]['date'] = work.find('date').text self._XMLSystems[bookOrderSystemCode]['title'] = work.find('title').text else: logging.warning( _("Missing work element in header") ) else: logging.warning( _("Missing header element (looking for {!r} tag)").format( self.headerTag ) ) else: logging.error( _("Expected to load {!r} but got {!r}").format( self.XMLTreeTag, self._XMLSystems[bookOrderSystemCode]['tree'].tag ) ) bookCount = 0 # There must be an easier way to do this for subelement in self._XMLSystems[bookOrderSystemCode]['tree']: bookCount += 1 vPrint( 'Info', debuggingThisModule, _(" Loaded {} books for {}").format( bookCount, bookOrderSystemCode ) ) logging.info( _(" Loaded {} books for {}").format( bookCount, bookOrderSystemCode ) ) if BibleOrgSysGlobals.strictCheckingFlag: self.__validateSystem( self._XMLSystems[bookOrderSystemCode]['tree'], bookOrderSystemCode ) else: # The data must have been already loaded if XMLFolder is not None and XMLFolder!=self.__XMLFolder: logging.error( _("Bible book order systems are already loaded -- your different folder of {!r} was ignored").format( self.__XMLFolder ) ) return self
def getNumVersesList(self, BBB: str, allowAlternatives=False): """ Returns a list containing an integer for each chapter indicating the number of verses. The length of the list is the number of chapters in the book. """ fnPrint(debuggingThisModule, "getNumVersesList( {} )".format(BBB)) if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag: assert len(BBB) == 3 if not allowAlternatives: return BibleVersificationSystem.getNumVersesList(self, BBB) # Well, we are allowed alternatives, but try the given BBB first anyway bookVersesList = None try: bookVersesList = BibleVersificationSystem.getNumVersesList( self, BBB) except KeyError: # BBB doesn't exist in this BOS -- try an alternative # Next line will raise an error if no alternatives (coz returns None) for altBBB in BibleOrgSysGlobals.loadedBibleBooksCodes.getPossibleAlternativeBooksCodes( BBB): try: bookVersesList = BibleVersificationSystem.getNumVersesList( self, altBBB) break except KeyError: continue # BBB doesn't exist in this BOS -- try an alternative if bookVersesList is not None: vPrint( 'Quiet', debuggingThisModule, "Changed {} to {} in {!r} versification scheme".format( BBB, altBBB, BibleVersificationSystem.getVersificationSystemName( self))) return bookVersesList
def briefDemo() -> None: """ Demonstrate reading and processing some UTF-8 ESFM files. """ BibleOrgSysGlobals.introduceProgram( __name__, programNameVersion, LAST_MODIFIED_DATE ) import os.path filepath = BibleOrgSysGlobals.BOS_TEST_DATA_FOLDERPATH.joinpath( 'MatigsalugDictionaryA.sfm' ) vPrint( 'Info', debuggingThisModule, "Using {} as test file…".format( filepath ) ) linesDB = ESFMFile() linesDB.read( filepath, ignoreSFMs=('mn','aMU','aMW','cu','cp') ) vPrint( 'Quiet', debuggingThisModule, len(linesDB.lines), 'lines read from file', filepath ) for i, r in enumerate(linesDB.lines): vPrint( 'Quiet', debuggingThisModule, i, r) if i>9: break vPrint( 'Quiet', debuggingThisModule, '…\n',len(linesDB.lines)-1, linesDB.lines[-1], '\n') # Display the last record
def getSSFFilenames( self, searchAbove=False, auto=True ): """ Return a list of full pathnames of .ssf files in the folder. NOTE: USFM projects don't usually have the .ssf files in the project folder, but 'backed-up' projects often do. If searchAbove is set to True and no ssf files are found in the given folder, this routine will attempt to search the next folder up the file hierarchy. Furthermore, unless auto is set to False, it will try to find the correct one from multiple SSFs. """ def getSSFFilenamesHelper( folder ): resultPathlist = [] files = os.listdir( folder ) for foundFilename in files: if not foundFilename.endswith('~'): # Ignore backup files foundFileBit, foundExtBit = os.path.splitext( foundFilename ) if foundExtBit.lower()=='.ssf': resultPathlist.append( os.path.join( folder, foundFilename ) ) return resultPathlist # end of getSSFFilenamesHelper filelist = getSSFFilenamesHelper( self.givenFolderName ) if not filelist and searchAbove: # try the next level up filelist = getSSFFilenamesHelper( os.path.join( self.givenFolderName, '../' ) ) if auto and len(filelist)>1: # See if we can help them by automatically choosing the right one count, index = 0, -1 for j, filepath in enumerate(filelist): # Check if we can find a single matching ssf file foundPathBit, foundExtBit = os.path.splitext( filepath ) foundPathBit, foundFileBit = os.path.split( foundPathBit ) #dPrint( 'Quiet', debuggingThisModule, foundPathBit, foundFileBit, foundExtBit, self.givenFolderName ) if foundFileBit in str(self.givenFolderName): index = j; count += 1 # Take a guess that this might be the right one #dPrint( 'Quiet', debuggingThisModule, count, index ) if count==1 and index!=-1: filelist = [ filelist[index] ] # Found exactly one so reduce the list down to this one filepath vPrint( 'Info', debuggingThisModule, f"getSSFFilenames: returning filelist ({len(filelist)})={filelist}" ) return filelist
def __init__( self, sourceFolder, givenName, encoding='utf-8' ) -> None: """ Constructor: just sets up the VerseView Bible object. """ # Setup and initialise the base class first Bible.__init__( self ) self.objectNameString = 'VerseView XML Bible object' self.objectTypeString = 'VerseView' # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join( self.sourceFolder, self.givenName ) self.XMLTree = self.header = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganisationalSystem( 'GENERIC-KJV-66-ENG' ) # Do a preliminary check on the readability of our file if not os.access( self.sourceFilepath, os.R_OK ): vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible: File {!r} is unreadable".format( self.sourceFilepath ) ) self.name = self.givenName
def run(): # Configure basic Bible Organisational System (BOS) set-up parser = BibleOrgSysGlobals.setup(PROGRAM_NAME, PROGRAM_VERSION) parser.add_argument("inputBibleFileOrFolder", help="path/to/BibleFileOrFolder") BibleOrgSysGlobals.addStandardOptionsAndProcess(parser) # Search for a Bible and attempt to load it unknownBible = UnknownBible( BibleOrgSysGlobals.commandLineArguments.inputBibleFileOrFolder) loadedBible = unknownBible.search( autoLoadAlways=True, autoLoadBooks=True) # Load all the books if we find any # See if we were successful at loading one (and only one), and if so, do the export if isinstance(loadedBible, Bible): # i.e., not an error message loadedBible.toUSXXML() # Export as USX files (USFM inside XML) vPrint( 'Quiet', False, f"\nOutput should be in {BibleOrgSysGlobals.DEFAULT_WRITEABLE_OUTPUT_FOLDERPATH.joinpath( 'BOS_USX2_Export/' )}/ folder." ) # Do the BOS close-down stuff BibleOrgSysGlobals.closedown(PROGRAM_NAME, PROGRAM_VERSION)
def _removeMetegOrSiluq(self, text: str, asVowel: bool) -> str: """ It's actually often impossible to tell automatically which purpose this Unicode mark has. """ #vPrint( 'Quiet', debuggingThisModule, "_removeMetegOrSiluq( {!r}, {} )".format( text, asVowel ) ) while text: textLength = len(text) madeChanges = False for j, mark in enumerate(text): if mark != metegOrSiluq: continue previousMark = text[j - 1] if j > 0 else '' nextMark = text[j + 1] if j < textLength - 1 else '' if previousMark in (patah, segol) or nextMark in ( ): # Assume it's a vowel point meteg if asVowel: vPrint( 'Info', debuggingThisModule, f"Deleting (vowel point) meteg after {previousMark!r} ({unicodedata.name(previousMark) if previousMark else ''}) and before {nextMark!r} ({unicodedata.name(nextMark) if nextMark else ''})" ) text = text[:j] + text[j + 1:] madeChanges = True break else: vPrint( 'Info', debuggingThisModule, f"Ignoring (vowel point) meteg/siluq after {previousMark!r} ({unicodedata.name(previousMark) if previousMark else ''}) and before {nextMark!r} ({unicodedata.name(nextMark) if nextMark else ''})" ) else: # it doesn't appear to be a vowel point meteg if not asVowel: vPrint( 'Info', debuggingThisModule, f"Deleting (cantillation mark) siluq after {previousMark!r} ({unicodedata.name(previousMark) if previousMark else ''}) and before {nextMark!r} ({unicodedata.name(nextMark) if nextMark else ''})" ) text = text[:j] + text[j + 1:] madeChanges = True break else: vPrint( 'Info', debuggingThisModule, f"Ignoring (cantillation mark) meteg/siluq after {previousMark!r} ({unicodedata.name(previousMark) if previousMark else ''}) and before {nextMark!r} ({unicodedata.name(nextMark) if nextMark else ''})" ) if not madeChanges: break # Check for another meteg if we made any changes return text
def briefDemo() -> None: """ Main program to handle command line parameters and then run what they want. """ BibleOrgSysGlobals.introduceProgram(__name__, programNameVersion, LAST_MODIFIED_DATE) if 1: # demonstrate the Greek Lexicon converter classes vPrint('Normal', debuggingThisModule, "\nDemonstrating the converter classes…") vPrint('Quiet', debuggingThisModule, '') gsc = GreekStrongsFileConverter() gsc.loadAndValidate() # Load the XML vPrint('Quiet', debuggingThisModule, gsc) # Just print a summary if BibleOrgSysGlobals.commandLineArguments.export: vPrint('Quiet', debuggingThisModule, "Exports aren't written yet!")
def __validateAndExtractBook( self, book, bookNumber ): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ vPrint( 'Verbose', debuggingThisModule, _("Validating XML book…") ) # Process the div attributes first BBB = bookName = None for attrib,value in book.items(): if attrib=="n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) ) if bookName: BBB = self.genericBOS.getBBBFromText( bookName ) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents( bookName ) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText( adjustedBookName ) BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookNumber ) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: vPrint( 'Quiet', debuggingThisModule, "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB ) ) BBB = BBB2 #dPrint( 'Quiet', debuggingThisModule, BBB ); halt if BBB: vPrint( 'Info', debuggingThisModule, _("Validating {} {}…").format( BBB, bookName ) ) thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format( BBB ) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' ) BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' ) self.__validateAndExtractChapter( BBB, thisBook, element ) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag ) ) vPrint( 'Info', debuggingThisModule, " Saving {} into results…".format( BBB ) ) self.stashBook( thisBook )
def processSearchResult(searchResult: dict, downloadAllBooks: bool = False) -> None: if searchResult and isinstance(searchResult, dict): dcsBible1 = DCSBible(searchResult, downloadAllBooks=downloadAllBooks) try: dcsBible1.preload() except FileNotFoundError: assert downloadAllBooks == False vPrint('Normal', debuggingThisModule, dcsBible1, end='\n\n') for testRef in testRefs: verseKey = SimpleVerseKey(*testRef) if BibleOrgSysGlobals.verbosityLevel > 0: vPrint('Quiet', debuggingThisModule, verseKey) vPrint('Quiet', debuggingThisModule, " ", dcsBible1.getVerseDataList(verseKey)) elif BibleOrgSysGlobals.verbosityLevel > 0: vPrint('Quiet', debuggingThisModule, f"Unexpected search result: {searchResult}")
def testUB(TUBfilename): # Crudely demonstrate the Unbound Bible class from BibleOrgSys.Reference import VerseReferences testFolder = BibleOrgSysGlobals.BOS_LIBRARY_BASE_FOLDERPATH.joinpath( '../../../../../../mnt/SSDs/Bibles/Biola Unbound modules/' ) # Must be the same as below TUBfolder = os.path.join(testFolder, TUBfilename + '/') vPrint('Normal', debuggingThisModule, _("Demonstrating the Unbound Bible class…")) vPrint('Quiet', debuggingThisModule, " Test folder is {!r} {!r}".format(TUBfolder, TUBfilename)) ub = UnboundBible(TUBfolder, TUBfilename) ub.load() # Load and process the file vPrint('Normal', debuggingThisModule, ub) # Just print a summary if BibleOrgSysGlobals.strictCheckingFlag: ub.check() #vPrint( 'Quiet', debuggingThisModule, UsfmB.books['GEN']._processedLines[0:40] ) uBErrors = ub.getCheckResults() # vPrint( 'Quiet', debuggingThisModule, uBErrors ) if BibleOrgSysGlobals.commandLineArguments.export: ##ub.toDrupalBible() ub.doAllExports(wantPhotoBible=False, wantODFs=False, wantPDFs=False) for reference in ( ('OT','GEN','1','1'), ('OT','GEN','1','3'), ('OT','PSA','3','0'), ('OT','PSA','3','1'), \ ('OT','DAN','1','21'), ('NT','MAT','3','5'), ('NT','JDE','1','4'), ('NT','REV','22','21'), \ ('DC','BAR','1','1'), ('DC','MA1','1','1'), ('DC','MA2','1','1',), ): (t, b, c, v) = reference if t == 'OT' and len(ub) == 27: continue # Don't bother with OT references if it's only a NT if t == 'NT' and len(ub) == 39: continue # Don't bother with NT references if it's only a OT if t == 'DC' and len(ub) <= 66: continue # Don't bother with DC references if it's too small svk = VerseReferences.SimpleVerseKey(b, c, v) #vPrint( 'Quiet', debuggingThisModule, svk, ob.getVerseDataList( reference ) ) shortText = svk.getShortText() try: verseText = ub.getVerseText(svk) except KeyError: verseText = "Verse not available!" vPrint('Normal', debuggingThisModule, reference, shortText, verseText)
def fullDemo() -> None: """ Full demo to check class is working """ BibleOrgSysGlobals.introduceProgram(__name__, programNameVersion, LAST_MODIFIED_DATE) if 1: # demonstrate the Greek Lexicon converter classes vPrint('Normal', debuggingThisModule, "\nDemonstrating the converter classes…") vPrint('Quiet', debuggingThisModule, '') gsc = GreekStrongsFileConverter() gsc.loadAndValidate() # Load the XML vPrint('Quiet', debuggingThisModule, gsc) # Just print a summary if BibleOrgSysGlobals.commandLineArguments.export: gsc.pickle() # Produce a pickle output file vPrint('Quiet', debuggingThisModule, "Other exports aren't written yet!")
def loadBooks(self): """ Load all the books. """ vPrint('Normal', debuggingThisModule, _("Loading {} from {}…").format(self.name, self.sourceFolder)) if not self.preloadDone: self.preload() if self.bookList: if BibleOrgSysGlobals.maxProcesses > 1 \ and not BibleOrgSysGlobals.alreadyMultiprocessing: # Get our subprocesses ready and waiting for work if BibleOrgSysGlobals.verbosityLevel > 1: vPrint( 'Quiet', debuggingThisModule, _("Loading {} GoBible books using {} processes…"). format(len(self.bookList), BibleOrgSysGlobals.maxProcesses)) vPrint( 'Quiet', debuggingThisModule, _(" NOTE: Outputs (including error and warning messages) from loading various books may be interspersed." )) BibleOrgSysGlobals.alreadyMultiprocessing = True with multiprocessing.Pool( processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes results = pool.map( self._loadBookMP, self.bookList) # have the pool do our loads assert len(results) == len(self.bookList) for bBook in results: bBook.containerBibleObject = self # Because the pickling and unpickling messes this up self.stashBook( bBook) # Saves them in the correct order BibleOrgSysGlobals.alreadyMultiprocessing = False else: # Just single threaded # Load the books one by one -- assuming that they have regular Paratext style filenames for BBB in self.bookList: #if BibleOrgSysGlobals.verbosityLevel>1 or BibleOrgSysGlobals.debugFlag: #dPrint( 'Quiet', debuggingThisModule, _(" GoBible: Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) ) loadedBook = self.loadBook(BBB) # also saves it else: logging.critical("GoBible: " + _( "No books to load in folder '{}'!").format(self.sourceFolder)) #dPrint( 'Quiet', debuggingThisModule, self.getBookList() ) # Delete the temporary folder (where .jar was unzipped) rmtree(self.unzippedFolderpath) self.unzippedFolderpath = None self.doPostLoadProcessing()
def fullDemo() -> None: """ Full demo to check class is working """ global debuggingThisModule BibleOrgSysGlobals.introduceProgram( __name__, programNameVersion, LAST_MODIFIED_DATE ) vPrint( 'Quiet', debuggingThisModule, "Since these are only helper classes, they can't actually do much at all." ) vPrint( 'Quiet', debuggingThisModule, " Try running USFMBibleBook or USXXMLBibleBook which use these classes." ) #IBB = InternalBibleInternals( 'GEN' ) ## The following fields would normally be filled in a by "load" routine in the derived class #IBB.objectNameString = 'Dummy test Internal Bible Book object' #IBB.objectTypeString = 'DUMMY' #IBB.sourceFilepath = 'Nowhere' #vPrint( 'Quiet', debuggingThisModule, IBB ) if 0: # Test reading and writing a USFM Bible (with MOST exports -- unless debugging) import os from BibleOrgSys.Formats.USFMBible import USFMBible testData = ( # name, abbreviation, folderpath for USFM files ("Matigsalug", 'MBTV', Path( '/mnt/SSDs/Matigsalug/Bible/MBTV/') ), ) # You can put your USFM test folder here for j, (name, abbrev, testFolder) in enumerate( testData ): vPrint( 'Quiet', debuggingThisModule, f"\nInternalBibleInternals B{j+1}/ {abbrev} from {testFolder}…" ) if os.access( testFolder, os.R_OK ): UB = USFMBible( testFolder, name, abbrev ) UB.load() UB.discover() # Why does this only help if -1 flag is enabled??? vPrint( 'Quiet', debuggingThisModule, ' ', UB ) if BibleOrgSysGlobals.strictCheckingFlag: UB.check() #debuggingThisModule = False for BBB,bookObject in UB.books.items(): bookObject._SectionIndex = InternalBibleSectionIndex( bookObject ) bookObject._SectionIndex.makeSectionIndex() if BBB=='GEN': halt else: logger.error( f"Sorry, test folder '{testFolder}' is not readable on this computer." )
def testForge4SS(F4SSFolder): # Crudely demonstrate the Forge for SwordSearcher Bible class from BibleOrgSys.Reference import VerseReferences vPrint('Normal', debuggingThisModule, _("Demonstrating the Forge for SwordSearcher Bible class…")) vPrint('Quiet', debuggingThisModule, " Test folder is {!r}".format(F4SSFolder)) vb = ForgeForSwordSearcherBible(F4SSFolder, "demo") vb.load() # Load and process the file vPrint('Normal', debuggingThisModule, vb) # Just print a summary if BibleOrgSysGlobals.strictCheckingFlag: vb.check() #vPrint( 'Quiet', debuggingThisModule, UsfmB.books['GEN']._processedLines[0:40] ) vBErrors = vb.getCheckResults() # vPrint( 'Quiet', debuggingThisModule, vBErrors ) if BibleOrgSysGlobals.commandLineArguments.export: ##vb.toDrupalBible() vb.doAllExports(wantPhotoBible=False, wantODFs=False, wantPDFs=False) for reference in ( ('OT','GEN','1','1'), ('OT','GEN','1','3'), ('OT','PSA','3','0'), ('OT','PSA','3','1'), \ ('OT','DAN','1','21'), ('NT','MAT','3','5'), ('NT','JDE','1','4'), ('NT','REV','22','21'), \ ('DC','BAR','1','1'), ('DC','MA1','1','1'), ('DC','MA2','1','1',), ): (t, b, c, v) = reference if t == 'OT' and len(vb) == 27: continue # Don't bother with OT references if it's only a NT if t == 'NT' and len(vb) == 39: continue # Don't bother with NT references if it's only a OT if t == 'DC' and len(vb) <= 66: continue # Don't bother with DC references if it's too small svk = VerseReferences.SimpleVerseKey(b, c, v) #vPrint( 'Quiet', debuggingThisModule, svk, ob.getVerseDataList( reference ) ) shortText = svk.getShortText() try: verseText = vb.getVerseText(svk) except KeyError: verseText = "Verse not available!" vPrint('Normal', debuggingThisModule, reference, shortText, verseText)
def BCVReferenceToInt(self, BCVReferenceTuple) -> int: """ Convert a BCV or BCVS reference to an integer especially so that references can be sorted. """ try: BBB, C, V = BCVReferenceTuple S = '' except: BBB, C, V, S = BCVReferenceTuple vPrint('Quiet', debuggingThisModule, BCVReferenceTuple) halt # Need to finish handling BCVReferenceTuple result = self.getReferenceNumber(BBB) try: intC = int(C) except ValueError: vPrint('Quiet', debuggingThisModule, repr(C)) halt # Need to finish handling C result = result * 100 + intC try: intV = int(V) except ValueError: vPrint('Quiet', debuggingThisModule, repr(V)) halt # Need to finish handling V result = result * 150 + intV try: intS = {'a': 0, 'b': 1}[S.lower()] if S else 0 except ValueError: vPrint('Quiet', debuggingThisModule, repr(S)) halt # Need to finish handling S result = result * 10 + intS return result
def exportDataToJSON(self, filepath=None): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ import json assert self._XMLTree self.importDataToPython() assert self.__DataList assert self.__DataDict if not filepath: folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH if not os.path.exists(folder): os.mkdir(folder) filepath = os.path.join(folder, self._filenameBase + '_Tables.json') vPrint('Normal', debuggingThisModule, _("Exporting to {}…").format(filepath)) with open(filepath, 'wt', encoding='utf-8') as myFile: for something in self.__DataList: # temp for debugging … xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx vPrint('Quiet', debuggingThisModule, "Dumping something", something) json.dump(something, myFile, indent=2) json.dump(self.__DataList, myFile, indent=2) for someKey, someItem in self.__DataDict.items( ): # temp for debugging … xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx vPrint('Quiet', debuggingThisModule, "Dumping someKey", someKey) json.dump(someKey, myFile, indent=2) vPrint('Quiet', debuggingThisModule, "Dumping someItem", someItem) json.dump(someItem, myFile, indent=2) json.dump(self.__DataDict, myFile, indent=2)
def getBrDrBrEntryHTML(self, key): """ The key is a BrDrBr number (string) like 'ah.ba.aa'. Returns an HTML entry for the given key. Returns None if the key is not found. """ vPrint('Never', debuggingThisModule, _("HebrewLexiconSimple.getBrDrBrEntryHTML( {!r} )").format(key)) if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag: assert key and key.count('.') == 2 if self.BrownDriverBriggsEntries is None: self.load() entry = self.getBrDrBrEntryData(key) vPrint('Verbose', debuggingThisModule, f" HebrewLexiconSimple.getBrDrBrEntryHTML got entry: {entry}") if entry: mainEntry = entry[0] \ .replace( '<sense>', '<span class="Sense">' ).replace( '</sense>', '</span>' ) \ .replace( '<w>', '<span class="HebrewWord">' ).replace( '</w>', '</span>' ) \ .replace( '<pos>', '<span class="POS">' ).replace( '</pos>', '</span>' ) \ .replace( '<ref>', '<span class="Ref">' ).replace( '</ref>', '</span>' ) \ .replace( '<def>', '<span class="Def">' ).replace( '</def>', '</span>' ) match = re.search('<type="(.+?)" id="(.+?)">', mainEntry) if match: #logging.warning( "Removed {} status field {} from {}" \ #.format( entryID, repr(mainEntry[match.start():match.end()]), repr(mainEntry) ) ) hType, hId = match.group(1), match.group(2) mainEntry = mainEntry[:match.start( )] + '<b>Type:</b> {}<br>'.format( hType) + mainEntry[match.end():] match = re.search('<id="(.+?)" type="(.+?)">', mainEntry) if match: #logging.warning( "Removed {} status field {} from {}" \ #.format( entryID, repr(mainEntry[match.start():match.end()]), repr(mainEntry) ) ) hId, hType = match.group(1), match.group(2) mainEntry = mainEntry[:match.start( )] + '<b>Type:</b> {}<br>'.format( hType) + mainEntry[match.end():] html = f'{mainEntry} <span class="Status">{{{entry[1]}}}</span>' vPrint( 'Verbose', debuggingThisModule, f" HebrewLexiconSimple.getBrDrBrEntryHTML about to return: {html}" ) return html
def loadBooks(self): """ Load all the books. """ vPrint('Normal', debuggingThisModule, "Loading {} from {}…".format(self.name, self.sourceFolder)) if not self.preloadDone: self.preload() if self.givenBookList: if BibleOrgSysGlobals.maxProcesses > 1: # Load all the books as quickly as possible if BibleOrgSysGlobals.verbosityLevel > 1: vPrint( 'Quiet', debuggingThisModule, "Loading {} BCV books using {} processes…".format( len(self.givenBookList), BibleOrgSysGlobals.maxProcesses)) vPrint( 'Quiet', debuggingThisModule, " NOTE: Outputs (including error and warning messages) from loading various books may be interspersed." ) BibleOrgSysGlobals.alreadyMultiprocessing = True with multiprocessing.Pool( processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes results = pool.map( self._loadBookMP, self.givenBookList) # have the pool do our loads assert len(results) == len(self.givenBookList) for bBook in results: self.stashBook( bBook) # Saves them in the correct order BibleOrgSysGlobals.alreadyMultiprocessing = False else: # Just single threaded # Load the books one by one -- assuming that they have regular Paratext style filenames for BBB in self.givenBookList: #if BibleOrgSysGlobals.verbosityLevel>1 or BibleOrgSysGlobals.debugFlag: #vPrint( 'Quiet', debuggingThisModule, _(" BCVBible: Loading {} from {} from {}…").format( BBB, self.name, self.sourceFolder ) ) loadedBook = self.loadBook(BBB) # also saves it else: logging.critical("BCVBible: " + _( "No books to load in folder '{}'!").format(self.sourceFolder)) #vPrint( 'Quiet', debuggingThisModule, self.getBookList() ) self.doPostLoadProcessing()
def fetchAllBibles(self): """ Download the Bible lists from DCS. This can be quite slow. """ fnPrint(debuggingThisModule, "DCSBibles.fetchAllBibles()") limit = 500 # Documentation says 50, but larger numbers seem to work ok vPrint( 'Normal', debuggingThisModule, f"Downloading list of available Bibles from DCS ({limit} at a time)…" ) if self.onlineVersion: # Get a list of available data sets self.BibleList = [] # Does a case-insensitive search for searchText in ('ULT', 'UST', 'Bible', 'ULB', 'UDB'): # 7,227 if these are all included!!! pageNumber = 1 while True: if BibleOrgSysGlobals.verbosityLevel > 0: vPrint('Quiet', debuggingThisModule, f" Getting '{searchText}' page {pageNumber}…") resultDict = self.getOnlineData( f'repos/search?q={searchText}&page={pageNumber}&limit={limit}' ) #dPrint( 'Quiet', debuggingThisModule, f" Result {type(resultDict)} {len(resultDict)} = {resultDict}" ) # resultDict should be a dict of length 2 with keys 'ok'(=True) and 'data' assert resultDict and isinstance( resultDict, dict) and len(resultDict) == 2 \ and resultDict['ok']==True if not resultDict['data']: break # no more data if BibleOrgSysGlobals.verbosityLevel > 1: vPrint('Quiet', debuggingThisModule, f" Got {len(resultDict['data'])} entries") self.BibleList.extend(resultDict['data']) if pageNumber > 1 \ and len(resultDict['data']) < limit: # must be finished break pageNumber += 1 dPrint('Quiet', debuggingThisModule, " BibleList", len(self.BibleList), self.BibleList) return self.BibleList
def noisyRegExDeleteAll( text:str, this:str ) -> str: """ """ regex = re.compile( this ) count1 = len( re.findall( regex, text ) ) if count1 == 0: vPrint( 'Info', debuggingThisModule, _("No occurrences of regex {!r} found to delete").format( this ) ) return text vPrint( 'Normal', debuggingThisModule, _("Deleting {:,} occurrence{} of regex {!r}").format( count1, '' if count1==1 else 's', this ) ) newText, count2 = re.subn( regex, '', text ) if count2!=count1 and BibleOrgSysGlobals.verbosityLevel > 2: vPrint( 'Quiet', debuggingThisModule, " " + _("Deleted {:,} occurrence{} of regex {!r}").format( count2, '' if count2==2 else 's', this ) ) count3 = len( re.findall( regex, newText ) ) if count3: # and BibleOrgSysGlobals.verbosityLevel > 0: logging.critical( " " + _("NOTE: {:,} occurrence{} of regex {!r} still remaining!").format( count3, '' if count3==1 else 's', this ) ) return newText
def loadMetadata(self, metadataFilepath) -> None: """ Process the metadata from the given filepath. Sets some class variables and puts a dictionary into self.settingsDict. """ vPrint('Never', debuggingThisModule, _("Loading metadata from {!r}").format(metadataFilepath)) self.metadataFilepath = metadataFilepath if self.suppliedMetadata is None: self.suppliedMetadata = {} if 'uW' not in self.suppliedMetadata: self.suppliedMetadata['uW'] = {} self.suppliedMetadata['uW']['Manifest'] = loadYAML(metadataFilepath) vPrint('Never', debuggingThisModule, f"\ns.sM: {self.suppliedMetadata}") if self.suppliedMetadata['uW']['Manifest']: self.applySuppliedMetadata('uW') # Copy some to self.settingsDict vPrint('Never', debuggingThisModule, f"\ns.sD: {self.settingsDict}")
def noisyReplaceAll( text:str, this:str, that:str, loop:bool=False ) -> str: """ """ count = text.count( this ) if count == 0: vPrint( 'Info', debuggingThisModule, _("No occurrences of {!r} found to replace").format( this ) ) return text vPrint( 'Normal', debuggingThisModule, _("Replacing {:,} occurrence{} of {!r} with {!r}").format( count, '' if count==1 else 's', this, that ) ) if loop: newText = text while this in newText: newText = newText.replace( this, that ) else: newText = text.replace( this, that ) count2 = newText.count( this ) if count2 and BibleOrgSysGlobals.verbosityLevel > 0: vPrint( 'Quiet', debuggingThisModule, " " + _("NOTE: {:,} occurrence{} of {!r} still remaining!").format( count2, '' if count2==1 else 's', this ) ) return newText
def getBBBFromText(self, bookNameOrAbbreviation: str) -> Optional[str]: """ Get the referenceAbbreviation from the given book name or abbreviation. (Automatically converts to upper case before comparing strings.) If it fails, tries the same named function from BibleBooksCodes """ vPrint( 'Never', debuggingThisModule, "BibleBooksNamesSystem.getBBBFromText( {} )".format( bookNameOrAbbreviation)) if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag: assert bookNameOrAbbreviation upperCaseBookNameOrAbbreviation = bookNameOrAbbreviation.upper() try: if upperCaseBookNameOrAbbreviation in self.__sortedBookNamesDict: return self.__sortedBookNamesDict[ upperCaseBookNameOrAbbreviation] except AttributeError: logging.critical( "No bookname dictionary in {} BibleBooksNamesSystem".format( self.__systemName)) return None if BibleOrgSysGlobals.debugFlag: # It failed so print what the closest alternatives were vPrint( 'Quiet', debuggingThisModule, "BibleBooksNamesSystem.getBBBFromText( {} ) {}".format( repr(bookNameOrAbbreviation), upperCaseBookNameOrAbbreviation)) #vPrint( 'Quiet', debuggingThisModule, self.__sortedBookNamesDict ) myList, thisLen = [], len(upperCaseBookNameOrAbbreviation) #for key in self.__sortedBookNamesDict.keys(): #if key.startswith('L'): vPrint( 'Quiet', debuggingThisModule, key ) for key in self.__sortedBookNamesDict.keys(): if key.startswith(upperCaseBookNameOrAbbreviation[0]) and len( key) == thisLen: myList.append(key) vPrint('Quiet', debuggingThisModule, "Possibility list is", myList) return BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromText( bookNameOrAbbreviation)
def getStrongsEntryHTML(self, key: str) -> Optional[str]: """ The key is a Greek Strong's number (string) like 'G1979'. Returns an HTML li entry for the given key. Returns None if the key is not found. e.g., for G1, returns: <li value="1" id="nt:1"> <span class="originalWord" title="{A}" xml:lang="grk">Α</span> of Hebrew origin; the first letter of the alphabet; figuratively, only (from its use as a numeral) the first: --Alpha. Often used (usually <translit="án" unicode="ἄν" BETA="A)/N">, before a vowel) also in composition (as a contraction from <span class="StrongsRef">G427</span> ) in the sense of privation; so, in many words, beginning with this letter; occasionally in the sense of union (as a contraction of <span class="StrongsRef">G260</span> ). </li> """ fnPrint(debuggingThisModule, f"GreekLexicon.getStrongsEntryHTML( {key} )") if debuggingThisModule or BibleOrgSysGlobals.debugFlag: assert key and key[0] == 'G' and key[1:].isdigit() keyDigits = key[1:].lstrip('0') # Remove leading zeroes if self.StrongsEntries is None: self.load() if keyDigits in self.StrongsEntries: entry = self.StrongsEntries[keyDigits] vPrint('Verbose', debuggingThisModule, f" GreekLexicon.getStrongsEntryHTML got entry: {entry}") wordEntry = '{}'.format( entry['Entry'].replace('<StrongsRef>','<span class="StrongsRef">').replace('</StrongsRef>','</span>').replace('<def>','<span class="def">').replace('</def>','</span>') ) \ if 'Entry' in entry else '' vPrint( 'Verbose', debuggingThisModule, f" GreekLexicon.getStrongsEntryHTML created wordEntry: {wordEntry}" ) html = f'<span class="GreekWord" title="{keyDigits}" xml:lang="grk">{entry["word"][0]} ({entry["word"][1]})</span> {wordEntry}' vPrint( 'Verbose', debuggingThisModule, f" GreekLexicon.getStrongsEntryHTML about to return: {html}") return html
def fullDemo() -> None: """ Full demo to check class is working """ BibleOrgSysGlobals.introduceProgram(__name__, programNameVersion, LAST_MODIFIED_DATE) BiblesPath = Path('/mnt/SSDs/Bibles/') testFolder = os.path.join(BiblesPath, 'Biola Unbound modules/') if 1: # demo the file checking code -- first with the whole folder and then with only one folder resultA1 = UnboundBibleFileCheck(testFolder) vPrint('Normal', debuggingThisModule, "Unbound TestA1", resultA1) resultA2 = UnboundBibleFileCheck(testFolder, autoLoad=True) vPrint('Normal', debuggingThisModule, "Unbound TestA2", resultA2) resultA3 = UnboundBibleFileCheck(testFolder, autoLoadBooks=True) vPrint('Normal', debuggingThisModule, "Unbound TestA3", resultA3) testSubfolder = os.path.join(testFolder, 'asv/') resultB1 = UnboundBibleFileCheck(testSubfolder) vPrint('Normal', debuggingThisModule, "Unbound TestB1", resultB1) resultB2 = UnboundBibleFileCheck(testSubfolder, autoLoad=True) vPrint('Normal', debuggingThisModule, "Unbound TestB2", resultB2) resultB3 = UnboundBibleFileCheck(testSubfolder, autoLoadBooks=True) vPrint('Normal', debuggingThisModule, "Unbound TestB3", resultB3) if 1: # specified modules single = ("kjv_apocrypha", ) good = ( "afrikaans_1953", "albanian", "aleppo", "amharic", "arabic_svd", "armenian_eastern", \ "armenian_western_1853", "asv", "basic_english", "danish", "darby", "douay_rheims", "dutch_svv", \ "esperanto", "estonian", "kjv_apocrypha", "korean", "manx_gaelic", "maori", "myanmar_judson_1835", \ "norwegian", "peshitta", "portuguese", "potawatomi", "romani", ) nonEnglish = () bad = () for j, testFilename in enumerate( single ): # Choose one of the above: single, good, nonEnglish, bad vPrint('Normal', debuggingThisModule, "\nUnbound C{}/ Trying {}".format(j + 1, testFilename)) #myTestFolder = os.path.join( testFolder, testFilename+'/' ) #testFilepath = os.path.join( testFolder, testFilename+'/', testFilename+'_utf8.txt' ) testUB(testFilename) if 1: # all discovered modules in the test folder foundFolders, foundFiles = [], [] for something in os.listdir(testFolder): somepath = os.path.join(testFolder, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) if BibleOrgSysGlobals.maxProcesses > 1: # Get our subprocesses ready and waiting for work vPrint( 'Normal', debuggingThisModule, "\nTrying all {} discovered modules…".format( len(foundFolders))) parameters = [folderName for folderName in sorted(foundFolders)] BibleOrgSysGlobals.alreadyMultiprocessing = True with multiprocessing.Pool(processes=BibleOrgSysGlobals.maxProcesses ) as pool: # start worker processes results = pool.map(testUB, parameters) # have the pool do our loads assert len(results) == len( parameters ) # Results (all None) are actually irrelevant to us here BibleOrgSysGlobals.alreadyMultiprocessing = False else: # Just single threaded for j, someFolder in enumerate(sorted(foundFolders)): vPrint('Normal', debuggingThisModule, "\nUnbound D{}/ Trying {}".format(j + 1, someFolder)) #myTestFolder = os.path.join( testFolder, someFolder+'/' ) testUB(someFolder)