def processAllRecords(bibDataFile): bibData = functions.loadBib(bibDataFile) keys = list(bibData.keys()) random.shuffle(keys) for key in keys: bibRecord = bibData[key] functions.processBibRecord(settings["path_to_memex"], bibRecord) language = functions.identifyLanguage(bibRecord["rCite"], "eng") ocrPublication(bibRecord["rCite"], language) functions.memexStatusUpdates(settings["path_to_memex"], ".pdf") functions.memexStatusUpdates(settings["path_to_memex"], ".bib") functions.memexStatusUpdates(settings["path_to_memex"], ".png") functions.memexStatusUpdates(settings["path_to_memex"], ".json")
def processAllRecordsSTR(pathToMemex): files = functions.dicOfRelevantFiles(pathToMemex, ".bib") citeKeys = list(files.keys()) random.shuffle(citeKeys) for citeKey in citeKeys: print(citeKey) bibData = functions.loadBib(files[citeKey]) if "pagetotal" in bibData: pageTotal = int(bibData["pagetotal"]) if pageTotal <= int(settings["page_limit"]): language = functions.identifyLanguage(bibData[citeKey], "eng") ocrPublication(citeKey, language, settings["page_limit"]) else: language = functions.identifyLanguage(bibData[citeKey], "eng") ocrPublication(citeKey, language, settings["page_limit"]) functions.memexStatusUpdates(settings["path_to_memex"], ".pdf") functions.memexStatusUpdates(settings["path_to_memex"], ".bib") functions.memexStatusUpdates(settings["path_to_memex"], ".png") functions.memexStatusUpdates(settings["path_to_memex"], ".json")
def processAllRecords(bibDataFile): #defines a functions for all the records bibData = functions.loadBib( bibDataFile) #loops through key-value-pairs in the bibData-dictionary keys = list(bibData.keys()) #keys from the list random.shuffle(keys) #randomizes the OCRing for key in keys: #loops through the keys bibRecord = bibData[key] #adds a key to the bibData functions.processBibRecord(settings["path_to_memex"], bibRecord) #assigns a new parameter language = functions.identifyLanguage( bibRecord["rCite"], "eng") #identifies a language, assigns the "eng" ocrPublication(bibRecord["rCite"], language, int( settings["page_limit"])) #sets a page limit, if there is such functions.memexStatusUpdates(settings["path_to_memex"], ".pdf") #creates a pdf functions.memexStatusUpdates(settings["path_to_memex"], ".bib") #creates a bib functions.memexStatusUpdates(settings["path_to_memex"], ".png") #creates a png functions.memexStatusUpdates(settings["path_to_memex"], ".json") #creates a jsonfile
prevPage = "" #there is no previous page elif k == "0001": #if the page is the first page of the record nextPage = "0002.html" #the next page is the second page of the record prevPage = "DETAILS.html" #the previous page is the Details page elif o == len(orderedPages)-1: #if the page is the last page of the record nextPage = "" #there is no next page prevPage = orderedPages[o-1] + ".html" #the previous page is the penultimate page of the record else: #for all other pages nextPage = orderedPages[o+1] + ".html" #the next page is the page behind in the record prevPage = orderedPages[o-1] + ".html" #the previous page is the page before in the record pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage) #replace the wildcard with a link to the page assigned in the lines before pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage) #replace the Previouspagehtml item with a link to the page assigned in the lines before pagePath = os.path.join(pathToBibFile.replace(citeKey+".bib", ""), "pages", "%s.html" % k) #create a filepath to each page in the pages-folder of each publication with open(pagePath, "w", encoding="utf8") as f9: f9.write(pageTemp) #create and save each page in that pages folder ########################################################### # FUNCTIONS TESTING ####################################### ########################################################### functions.memexStatusUpdates(settings["path_to_memex"], ".html") #execute the memexStatusUpdates-function def processAllRecords(pathToMemex): files = functions.dicOfRelevantFiles(pathToMemex, ".bib") #take the bibFiles for citeKey, pathToBibFile in files.items(): #loop through them if os.path.exists(pathToBibFile.replace(".bib", ".json")): #search for files with json extension generatePublicationInterface(citeKey, pathToBibFile) #execute the previous function processAllRecords(settings["path_to_memex"]) #execute the overall function exec(open("6_Interface_IndexPage.py").read())
prevPage = "" elif k == "0001": nextPage = "0002.html" prevPage = "DETAILS.html" elif o == len(orderedPages)-1: nextPage = "" prevPage = orderedPages[o-1] + ".html" else: nextPage = orderedPages[o+1] + ".html" prevPage = orderedPages[o-1] + ".html" pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage) pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage) pagePath = os.path.join(pathToBibFile.replace(citeKey+".bib", ""), "pages", "%s.html" % k) with open(pagePath, "w", encoding="utf8") as f9: f9.write(pageTemp) ########################################################### # FUNCTIONS TESTING ####################################### ########################################################### functions.memexStatusUpdates(settings["path_to_memex"], ".html") def processAllRecords(pathToMemex): files = functions.dicOfRelevantFiles(pathToMemex, ".bib") for citeKey, pathToBibFile in files.items(): if os.path.exists(pathToBibFile.replace(".bib", ".json")): generatePublicationInterface(citeKey, pathToBibFile) processAllRecords(settings["path_to_memex"]) exec(open("9_Interface_IndexPage.py").read())
else: nextPage = orderedPages[o + 1] + ".html" prevPage = orderedPages[o - 1] + ".html" pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage) pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage) pagePath = os.path.join( pathToBibFile.replace(citeKey + ".bib", ""), "pages", "%s.html" % k) with open(pagePath, "w", encoding="utf8") as f9: f9.write(pageTemp) ########################################################### # PROCESS ALL RECORDS ##################################### ########################################################### functions.memexStatusUpdates(memexPath, ".html") def processAllRecords(): relDic = functions.dicOfRelevantFiles(memexPath, "bib") for k, v in relDic.items(): generatePublicationInterface(k[:-1], v) processAllRecords() exec(open("7_IndexPage.py").read())