def demo(): """ Main program to handle command line parameters and then run what they want. """ if Globals.verbosityLevel > 0: print("{} V{}".format(ProgName, ProgVersion)) # Since this is only designed to be a base class, it can't actually do much at all B = Bible() if Globals.verbosityLevel > 0: print(B) if 1: # Test a single folder containing a USFM Bible from USFMBible import USFMBible name, encoding, testFolder = "Matigsalug", "utf-8", "../../../../../Data/Work/Matigsalug/Bible/MBTV/" # You can put your test folder here if os.access(testFolder, os.R_OK): UB = USFMBible(testFolder, name, encoding) UB.load() if Globals.verbosityLevel > 0: print(UB) if Globals.strictCheckingFlag: UB.check() UB.doAllExports("OutputFiles", wantPhotoBible=False, wantODFs=False, wantPDFs=False) else: print("Sorry, test folder '{}' is not readable on this computer.". format(testFolder))
def demo(): """ Main program to handle command line parameters and then run what they want. """ if Globals.verbosityLevel > 0: print( "{} V{}".format(ProgName, ProgVersion ) ) # Since this is only designed to be a base class, it can't actually do much at all B = Bible() if Globals.verbosityLevel > 0: print( B ) if 1: # Test a single folder containing a USFM Bible from USFMBible import USFMBible name, encoding, testFolder = "Matigsalug", "utf-8", "../../../../../Data/Work/Matigsalug/Bible/MBTV/" # You can put your test folder here if os.access( testFolder, os.R_OK ): UB = USFMBible( testFolder, name, encoding ) UB.load() if Globals.verbosityLevel > 0: print( UB ) if Globals.strictCheckingFlag: UB.check() UB.doAllExports( "OutputFiles" ) else: print( "Sorry, test folder '{}' is not readable on this computer.".format( testFolder ) )
def createEmptyUSFMBooks( folderPath, currentBBB, requestDict ): """ Create empty USFM books or CV shells in the given folderPath as requested by the dictionary parameters: Books: 'OT' Fill: 'Versification' Versification: 'KJV' Version: 'KJV1611' """ from BibleVersificationSystems import BibleVersificationSystem from InternalBible import OT39_BOOKLIST, NT27_BOOKLIST from InternalBibleInternals import BOS_ALL_ADDED_MARKERS from USFMBible import USFMBible if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( exp("createEmptyUSFMBooks( {}, {}, {} )").format( folderPath, currentBBB, requestDict ) ) versificationObject = BibleVersificationSystem( requestDict['Versification'] ) \ if requestDict['Fill']=='Versification' else None print( 'versificationObject', versificationObject ) if versificationObject is not None: getNumChapters, getNumVerses = versificationObject.getNumChapters, versificationObject.getNumVerses if requestDict['Fill'] == 'Version': #ALL_CHAR_MARKERS = BibleOrgSysGlobals.USFMMarkers.getCharacterMarkersList( expandNumberableMarkers=True ) uB = USFMBible( requestDict['Version'] ) # Get the Bible object print( "Fill Bible1", uB ) uB.preload() print( "Fill Bible2", uB ) #uB.loadBooks() #print( "Fill Bible3", uB ) if requestDict['Books'] == 'None': booklist = [] elif requestDict['Books'] == 'Current': booklist = [ currentBBB ] elif requestDict['Books'] == 'All': booklist = OT39_BOOKLIST + NT27_BOOKLIST elif requestDict['Books'] == 'OT': booklist = OT39_BOOKLIST elif requestDict['Books'] == 'NT': booklist = NT27_BOOKLIST else: halt # programming error count = 0 skippedBooklist = [] for BBB in booklist: if requestDict['Fill'] == 'Versification' \ and versificationObject is not None \ and BBB not in versificationObject: skippedBooklist.append( BBB ) continue #if requestDict['Fill'] == 'Version' \ #and uB is not None \ #and BBB not in uB: #skippedBooklist.append( BBB ) #continue USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB ) USFMNumber = BibleOrgSysGlobals.BibleBooksCodes.getUSFMNumber( BBB ) if requestDict['Fill'] == 'None': bookText = '' elif requestDict['Fill'] == 'Basic': bookText = '\\id {} Empty book created by {}\n'.format( USFMAbbreviation.upper(), APP_NAME_VERSION ) bookText += '\\ide UTF-8\n' bookText += '\\h Bookname\n' bookText += '\\mt Book Title\n' bookText += '\\c 1\n' elif requestDict['Fill'] == 'Versification': bookText = createEmptyUSFMBookText( BBB, getNumChapters, getNumVerses ) elif requestDict['Fill'] == 'Version': try: uB.loadBook( BBB ) except FileNotFoundError: skippedBooklist.append( BBB ) continue uBB = uB[BBB] # Get the Bible book object bookText = '' for verseDataEntry in uBB._processedLines: pseudoMarker, cleanText = verseDataEntry.getMarker(), verseDataEntry.getCleanText() #print( BBB, pseudoMarker, repr(cleanText) ) if '¬' in pseudoMarker or pseudoMarker in BOS_ALL_ADDED_MARKERS or pseudoMarker in ('c#','vp#',): continue # Just ignore added markers -- not needed here #if pseudoMarker in ('v','f','fr','x','xo',): # These fields should always end with a space but the processing will have removed them #pseudoMarker += ' ' # Append a space since it didn't have one #if pseudoMarker in ALL_CHAR_MARKERS: # Character markers to be closed #print( "CHAR MARKER" ) #pass ##if (USFM[-2]=='\\' or USFM[-3]=='\\') and USFM[-1]!=' ': #if bookText[-1] != ' ': #bookText += ' ' # Separate markers by a space e.g., \p\bk Revelation #if BibleOrgSysGlobals.debugFlag: print( "toUSFM: Added space to {!r} before {!r}".format( bookText[-2], pseudoMarker ) ) #adjValue += '\\{}*'.format( pseudoMarker ) # Do a close marker #elif pseudoMarker in ('f','x',): inField = pseudoMarker # Remember these so we can close them later #elif pseudoMarker in ('fr','fq','ft','xo',): USFM += ' ' # These go on the same line just separated by spaces and don't get closed if bookText: bookText += '\n' # paragraph markers go on a new line if not cleanText: bookText += '\\{}'.format( pseudoMarker ) elif pseudoMarker == 'c': bookText += '\\c {}'.format( cleanText ) elif pseudoMarker == 'v': bookText += '\\v {} '.format( cleanText ) else: bookText += '\\{} '.format( pseudoMarker ) #print( pseudoMarker, USFM[-200:] ) else: halt # programming error # Write the actual file filename = '{}-{}.USFM'.format( USFMNumber, USFMAbbreviation ) with open( os.path.join( folderPath, filename ), mode='wt', encoding='utf-8' ) as theFile: theFile.write( bookText ) count += 1 print( len(skippedBooklist), "books skipped:", skippedBooklist ) # Should warn the user here print( count, "books created" )
def demo(): """ Demonstration program to handle command line parameters and then run what they want. """ from USFMBible import USFMBible if BibleOrgSysGlobals.verbosityLevel > 0: print( "{} Demo".format( ProgNameVersion ) ) # Load a USFM Bible and BT if BibleOrgSysGlobals.verbosityLevel > 0: print( "\nLoading USFM Bible…" ) name1, encoding1, testFolder1 = "MBTV", 'utf-8', "../../../../../Data/Work/Matigsalug/Bible/MBTV/" # You can put your test folder here name2, encoding2, testFolder2 = "MS-BT", 'utf-8', "../../../../../Data/Work/Matigsalug/Bible/MBTBT/" # You can put your test folder here MS_ILLEGAL_STRINGS_1 = ( 'c','f','j','o','q','v','x','z', ) + DEFAULT_ILLEGAL_STRINGS_1 MS_ILLEGAL_STRINGS_2 = ( 'We ',' we ',' us ',' us.',' us,',' us:',' us;',' us!',' us?',' us–',' us—', 'Our ',' our ','You ','you ','you.','you,','you:','you;','you!','you?','you–','you—', 'Your ','your ','yours ',' the the ', ) + DEFAULT_ILLEGAL_STRINGS_2 if os.access( testFolder1, os.R_OK ): UB1 = USFMBible( testFolder1, name1, encoding1 ) UB1.load() if BibleOrgSysGlobals.verbosityLevel > 0: print( UB1 ) if BibleOrgSysGlobals.strictCheckingFlag: UB1.check() #UB1.doAllExports( "OutputFiles", wantPhotoBible=False, wantODFs=False, wantPDFs=False ) else: print( "Sorry, test folder {!r} is not readable on this computer.".format( testFolder1 ) ) if os.access( testFolder2, os.R_OK ): UB2 = USFMBible( testFolder2, name2, encoding2 ) UB2.load() if BibleOrgSysGlobals.verbosityLevel > 0: print( UB2 ) if BibleOrgSysGlobals.strictCheckingFlag: UB2.check() #UB2.doAllExports( "OutputFiles", wantPhotoBible=False, wantODFs=False, wantPDFs=False ) else: print( "Sorry, test folder {!r} is not readable on this computer.".format( testFolder2 ) ) if 0: # Test one book if BibleOrgSysGlobals.verbosityLevel > 0: print( "\nTesting one book only…" ) BBB = 'JDE' result = compareBooksPedantic( UB1[BBB], UB2[BBB], illegalStrings1=MS_ILLEGAL_STRINGS_1, illegalStrings2=MS_ILLEGAL_STRINGS_2 ) if BibleOrgSysGlobals.verbosityLevel > 0: print( "Comparing {} gave:".format( BBB ) ) print( ' ', result ) if 1: # Test the whole Bibles if BibleOrgSysGlobals.verbosityLevel > 0: print( "\nTesting for whole Bible…" ) results = compareBibles( UB1, UB2, illegalStrings1=MS_ILLEGAL_STRINGS_1, illegalStrings2=MS_ILLEGAL_STRINGS_2 ) totalCount = resultsBooksCount = 0 if BibleOrgSysGlobals.verbosityLevel > 0: print( "\nComparing the entire Bibles gave:" ) for BBB,bookResults in results.items(): if bookResults: resultsBooksCount += 1 totalCount += len( bookResults ) print( '\n{} ({} vs {}):'.format( BBB, name1, name2 ) ) for (C,V,marker),resultString in bookResults: resultString = resultString.replace( 'Bible1', name1 ).replace( 'Bible2', name2 ) print( ' {} {}:{} {} {}'.format( BBB, C, V, marker, resultString ) ) print( "{} total results in {} books (out of {})".format( totalCount, resultsBooksCount, len(UB1) ) ) if 0: # Compare one book if BibleOrgSysGlobals.verbosityLevel > 0: print( "\nAnalyzing one book only…" ) BBB = 'JDE' segmentResult, otherResult = segmentizeBooks( UB1[BBB], UB2[BBB] ) if BibleOrgSysGlobals.verbosityLevel > 0: print( "Comparing {} gave:".format( BBB ) ) #print( ' 1s', len(segmentResult), segmentResult ) print( ' 2o', len(otherResult), otherResult ) dict12, dict21 = loadWordCompares( 'Tests/DataFilesForTests', 'MSBTCheckWords.txt' ) awResult = analyzeWords( segmentResult, dict12, dict21 ) if BibleOrgSysGlobals.verbosityLevel > 0: print( "Comparing {} gave:".format( BBB ) ) print( '\n{} ({} vs {}):'.format( BBB, name1, name2 ) ) for (C,V,marker),resultString in awResult: resultString = resultString.replace( 'Bible1', name1 ).replace( 'Bible2', name2 ) print( ' {} {}:{} {} {}'.format( BBB, C, V, marker, resultString ) ) print( "{:,} results in {}".format( len(awResult), BBB ) ) if 0: # Compare the whole Bibles if BibleOrgSysGlobals.verbosityLevel > 0: print( "\nAnalyzing whole Bible…" ) totalSegments = totalCount = 0 for BBB in UB1.getBookList(): segmentResult, otherResult = segmentizeBooks( UB1[BBB], UB2[BBB] ) totalSegments += len( segmentResult ) if BibleOrgSysGlobals.verbosityLevel > 0: print( "Comparing {} gave:".format( BBB ) ) #print( ' 1s', len(segmentResult), segmentResult ) print( ' 2o', len(otherResult), otherResult ) dict12, dict21 = loadWordCompares( 'Tests/DataFilesForTests', 'MSBTCheckWords.txt' ) awResult = analyzeWords( segmentResult, dict12, dict21 ) totalCount += len( awResult ) if BibleOrgSysGlobals.verbosityLevel > 0: print( '\n{} ({} vs {}):'.format( BBB, name1, name2 ) ) for (C,V,marker),resultString in awResult: resultString = resultString.replace( 'Bible1', name1 ).replace( 'Bible2', name2 ) print( ' {} {}:{} {} {}'.format( BBB, C, V, marker, resultString ) ) print( " {:,} results in {}".format( len(awResult), BBB ) ) if BibleOrgSysGlobals.verbosityLevel > 0: print( "{:,} total results in {} books ({:,} segments)".format( totalCount, len(UB1), totalSegments ) )
def loadBookIfNecessary(self, BBB): """ Download the book if necessary. TODO: This function doesn't check if the USFM book was downloaded by a previous run (and is still up-to-date) """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(f"DCSBible.loadBookIfNecessary( {BBB} )…") if not self.downloadedAllBooks: if BBB not in self.attemptedDownload or not self.attemptedDownload[ BBB]: self.attemptedDownload[BBB] = True # TODO: Change to .tar.gz instead of zip nn = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB) if nn > 39: nn += 1 # DSC uses #41 for MAT (not 39) uBBB = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB).upper() USFMfilename = f'{nn:02}-{uBBB}.usfm' zipURL = f'{self.baseURL}/raw/branch/master/{USFMfilename}' if BibleOrgSysGlobals.verbosityLevel > 1: print("Downloading {} file from '{}'…".format(BBB, zipURL)) try: HTTPResponseObject = urllib.request.urlopen(zipURL) except urllib.error.HTTPError as err: #errorClass, exceptionInstance, traceback = sys.exc_info() #print( '{!r} {!r} {!r}'.format( errorClass, exceptionInstance, traceback ) ) logging.critical("DCS HTTPError '{}' from {}".format( err, zipURL)) return except urllib.error.URLError as err: #errorClass, exceptionInstance, traceback = sys.exc_info() #print( '{!r} {!r} {!r}'.format( errorClass, exceptionInstance, traceback ) ) logging.critical("DCS URLError '{}' from {}".format( err, zipURL)) return # print( " HTTPResponseObject", HTTPResponseObject ) contentType = HTTPResponseObject.info().get('content-type') if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(" contentType", repr(contentType)) if contentType == 'text/plain; charset=utf-8': downloadedData = HTTPResponseObject.read() if BibleOrgSysGlobals.verbosityLevel > 0: print( f" Downloaded {len(downloadedData):,} bytes from '{zipURL}'" ) with open(os.path.join(self.sourceFolder, USFMfilename), 'wt') as ourUSFMfile: ourUSFMfile.write(downloadedData.decode('utf-8')) else: print(" contentType", repr(contentType)) halt # unknown content type if not self.preloadDone: self.preload() else: if BibleOrgSysGlobals.verbosityLevel > 2 or debuggingThisModule or BibleOrgSysGlobals.debugFlag: print(f"{BBB} was already downloaded (or attempted)") return USFMBible.loadBookIfNecessary(self, BBB)
def __init__(self, parameterOne, resourcesObject=None, downloadAllBooks=False): """ Create the Door43 cataloged Bible object. parameterOne can be: a catalog dictionary entry (and second parameter must be None) or an index into the BibleList in the resourcesObject passed as the second parameter """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( f"DCSBible.__init__( {parameterOne}, {resourcesObject}, {downloadAllBooks} )…" ) if isinstance(parameterOne, dict): assert resourcesObject is None resourceDict = parameterOne else: assert isinstance(parameterOne, int) assert resourcesObject # why ??? and isinstance( resourcesObject, Door43CatalogResources ) resourceDict = resourcesObject.getBibleResourceDict(parameterOne) assert resourceDict and isinstance(resourceDict, dict) #print( 'resourceDict', resourceDict ) #print( 'resourceDict', resourceDict.keys() ) self.baseURL = resourceDict['html_url'] #print( 'self.baseURL', self.baseURL ) adjustedRepoName = resourceDict['full_name'].replace('/', '--') #print( 'adjustedRepoName', adjustedRepoName ) desiredFolderName = BibleOrgSysGlobals.makeSafeFilename( adjustedRepoName) unzippedFolderPath = os.path.join( BibleOrgSysGlobals.DOWNLOADED_RESOURCES_FOLDER, 'Door43ContentServiceOnline/', f"{adjustedRepoName}/") if downloadAllBooks: # See if files already exist and are current (so don't download again) alreadyDownloadedFlag = False if os.path.isdir(unzippedFolderPath): #print( f"Issued: {resourceDict['issued']}" ) updatedDatetime = datetime.strptime(resourceDict['updated_at'], '%Y-%m-%dT%H:%M:%SZ') #print( f"updatedDatetime: {updatedDatetime}" ) #print( f"folder: {os.stat(unzippedFolderPath).st_mtime}" ) folderModifiedDatetime = datetime.fromtimestamp( os.stat(unzippedFolderPath).st_mtime) #print( f"folderModifiedDatetime: {folderModifiedDatetime}" ) alreadyDownloadedFlag = folderModifiedDatetime > updatedDatetime #print( f"alreadyDownloadedFlag: {alreadyDownloadedFlag}" ) if alreadyDownloadedFlag: if BibleOrgSysGlobals.verbosityLevel > 1: print( "Skipping download because folder '{}' already exists." .format(unzippedFolderPath)) else: # Download the zip file (containing all the USFM files, README.md, LICENSE.md, manifest.yaml, etc.) # TODO: Change to .tar.gz instead of zip zipURL = self.baseURL + '/archive/master.zip' # '/archive/master.tar.gz' if BibleOrgSysGlobals.verbosityLevel > 1: print("Downloading entire repo from '{}'…".format(zipURL)) try: HTTPResponseObject = urllib.request.urlopen(zipURL) except urllib.error.URLError as err: #errorClass, exceptionInstance, traceback = sys.exc_info() #print( '{!r} {!r} {!r}'.format( errorClass, exceptionInstance, traceback ) ) logging.critical("DCS URLError '{}' from {}".format( err, zipURL)) return # print( " HTTPResponseObject", HTTPResponseObject ) contentType = HTTPResponseObject.info().get('content-type') if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(" contentType", repr(contentType)) if contentType == 'application/octet-stream': try: os.makedirs(unzippedFolderPath) except FileExistsError: pass downloadedData = HTTPResponseObject.read() if BibleOrgSysGlobals.verbosityLevel > 0: print( f" Downloaded {len(downloadedData):,} bytes from '{zipURL}'" ) # Bug in Python up to 3.7 makes this not work for large aligned Bibles (3+ MB) # myTempFile = tempfile.SpooledTemporaryFile() myTempFile = tempfile.TemporaryFile() myTempFile.write(downloadedData) with zipfile.ZipFile(myTempFile) as myzip: # NOTE: Could be a security risk here myzip.extractall(unzippedFolderPath) myTempFile.close() # Automatically deletes the file else: print(" contentType", repr(contentType)) halt # unknown content type self.downloadedAllBooks = True # There's probably a folder inside this folder folders = os.listdir(unzippedFolderPath) #print( 'folders', folders ) assert len( folders ) == 1 # else maybe a previous download failed -- just manually delete the folder desiredFolderName = folders[0] + '/' #print( 'desiredFolderName', desiredFolderName ) USFMBible.__init__(self, os.path.join(unzippedFolderPath, desiredFolderName), givenName=resourceDict['name']) else: self.downloadedAllBooks = False self.attemptedDownload = {} try: os.makedirs(unzippedFolderPath) except FileExistsError: pass USFMBible.__init__(self, unzippedFolderPath, givenName=resourceDict['name']) self.objectNameString = 'DCS USFM Bible object'
def __init__(self, parameterOne, resourcesObject=None): """ Create the Door43 cataloged Bible object. parameterOne can be: a catalog dictionary entry (and second parameter must be None) or an index into the BibleList in the resourcesObject passed as the second parameter """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( f"Door43CatalogBible.__init__( {parameterOne}, {resourcesObject} )…" ) if isinstance(parameterOne, dict): assert resourcesObject is None resourceDict = parameterOne else: assert isinstance(parameterOne, int) assert resourcesObject # why ??? and isinstance( resourcesObject, Door43CatalogResources ) resourceDict = resourcesObject.getBibleResourceDict(parameterOne) assert resourceDict and isinstance(resourceDict, dict) #print( 'resourceDict', resourceDict ) #print( 'resourceDict', resourceDict.keys() ) if debuggingThisModule: print('formats', resourceDict['formats']) if 'formats' in resourceDict: formats = resourceDict['formats'] else: assert len(resourceDict['projects']) == 1 formats = resourceDict['projects'][0]['formats'] assert formats for formatDict in formats: #print( 'formatDict', formatDict ) formatString = formatDict['format'] if 'application/zip;' in formatString and 'usfm' in formatString: size, zipURL = formatDict['size'], formatDict['url'] break else: logging.critical( f"No zip URL found for '{resourceDict['language']}' '{resourceDict['title']}'" ) return # See if files already exist and are current (so don't download again) alreadyDownloadedFlag = False unzippedFolderPath = os.path.join( BibleOrgSysGlobals.DOWNLOADED_RESOURCES_FOLDER, 'Door43Catalog/', f"{resourceDict['language']}_{resourceDict['title']}/") if os.path.isdir(unzippedFolderPath): #print( f"Issued: {resourceDict['issued']}" ) issuedDatetime = datetime.strptime(resourceDict['issued'], '%Y-%m-%dT%H:%M:%S+00:00') #print( f"issuedDatetime: {issuedDatetime}" ) #print( f"folder: {os.stat(unzippedFolderPath).st_mtime}" ) folderModifiedDatetime = datetime.fromtimestamp( os.stat(unzippedFolderPath).st_mtime) #print( f"folderModifiedDatetime: {folderModifiedDatetime}" ) alreadyDownloadedFlag = folderModifiedDatetime > issuedDatetime #print( f"alreadyDownloadedFlag: {alreadyDownloadedFlag}" ) if alreadyDownloadedFlag: if BibleOrgSysGlobals.verbosityLevel > 1: print("Skipping download because folder '{}' already exists.". format(unzippedFolderPath)) else: # Download the zip file (containing all the USFM files, LICENSE.md, manifest.yaml, etc.) if BibleOrgSysGlobals.verbosityLevel > 1: print("Downloading {:,} bytes from '{}'…".format(size, zipURL)) try: HTTPResponseObject = urllib.request.urlopen(zipURL) except urllib.error.URLError as err: #errorClass, exceptionInstance, traceback = sys.exc_info() #print( '{!r} {!r} {!r}'.format( errorClass, exceptionInstance, traceback ) ) logging.critical("Door43 URLError '{}' from {}".format( err, zipURL)) return None # print( " HTTPResponseObject", HTTPResponseObject ) contentType = HTTPResponseObject.info().get('content-type') if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print(" contentType", contentType) if contentType == 'application/zip': try: os.makedirs(unzippedFolderPath) except FileExistsError: pass # Bug in Python up to 3.7 makes this not work for large aligned Bibles (3+ MB) # myTempFile = tempfile.SpooledTemporaryFile() myTempFile = tempfile.TemporaryFile() myTempFile.write(HTTPResponseObject.read()) with zipfile.ZipFile(myTempFile) as myzip: # NOTE: Could be a security risk here myzip.extractall(unzippedFolderPath) else: halt # unknown content type # There's probably a folder inside this folder folders = os.listdir(unzippedFolderPath) #print( 'folders', folders ) assert len(folders) == 1 desiredFolderName = folders[0] + '/' #print( 'desiredFolderName', desiredFolderName ) USFMBible.__init__(self, os.path.join(unzippedFolderPath, desiredFolderName), givenName=resourceDict['title'], givenAbbreviation=resourceDict['identifier']) self.objectNameString = 'Door43 USFM Bible object'