Пример #1
0
def generateContentPage():

    # #
    ####
    #### Pseudocode:
    #### funkion:
    #### template öffnen
    #### @PATHTOPUBL@, [@CITEKEY@], @AUTHOR@ (@DATE@) @TITLE@ + Link zur details(.html) des Textes
    #### dictionary mit diesen variablen
    #### loop um jeweils einen citekey/text/
    #### Input: citekey,Path zum file, usw. -> Output Liste

    ###
    # load individual bib record
    # bibFile = pathToBibFile
    #bibDic = functions.loadBib(bibFile)
    #bibForHTML = functions.prettifyBib(bibDic[citeKey]["complete"])

    ####
    #pageTemp = template
    #pageTemp = pageTemp.replace("@PATHTOPUBL@", v)
    #pageTemp = pageTemp.replace("@CITEKEY@", v)
    #pageTemp = pageTemp.replace("@AUTHOR@", v)
    #pageTemp = pageTemp.replace("@DATE@", v)
    #pageTemp = pageTemp.replace("@TITLE@", v)

    detailfileDic = functions.generatePageLinks(pNums)
    ###        # load page template # wir brauchen template_index.html
    with open(settings["template_index"], "r", encoding="utf8") as ft:
        template = ft.read()

    # load page template
    #with open(settings["template_page"], "r", encoding="utf8") as ft:
    #template = ft.read()

    # load individual bib record
    bibFile = pathToBibFile
    bibDic = functions.loadBib(bibFile)
    bibForHTML = functions.prettifyBib(bibDic[citeKey]["complete"])

    orderedPages = list(detailfileDic.keys())

    for o in range(0, len(orderedPages)):
        #print(o)
        k = orderedPages[o]
        v = pageDic[orderedPages[o]]

        pageTemp = template
        pageTemp = pageTemp.replace("@PAGELINKS@", v)
        pageTemp = pageTemp.replace("@PATHTOFILE@", "")
        pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey)
Пример #2
0
def generateContents (...):
    #generate a list of publications with links 
    #join it with the index page template
    # shoud look like this: <li><a href="@PATHTOPUBL@/pages/DETAILS.html">[@CITEKEY@]</a> @AUTHOR@ (@DATE@) - <i>@TITLE@</i></li>
    #load the bib file to get all the values
    # load the index template file 
    # write the <li> etc. into the content 
    # replace @author with author etc.
    # open as contents.html 

    # #
####
#### Pseudocode: 
#### funkion: 
#### template öffnen
#### @PATHTOPUBL@, [@CITEKEY@], @AUTHOR@ (@DATE@) @TITLE@ + Link zur details(.html) des Textes
#### dictionary mit diesen variablen 
#### loop um jeweils einen citekey/text/
#### Input: citekey,Path zum file, usw. -> Output Liste
###         
        # load individual bib record
       # bibFile = pathToBibFile
        #bibDic = functions.loadBib(bibFile)
        #bibForHTML = functions.prettifyBib(bibDic[citeKey]["complete"])
####                
         #   pageTemp = template
          #  pageTemp = pageTemp.replace("@PATHTOPUBL@", v)
            #pageTemp = pageTemp.replace("@CITEKEY@", v)
           #pageTemp = pageTemp.replace("@AUTHOR@", v)
            #pageTemp = pageTemp.replace("@DATE@", v)
            #pageTemp = pageTemp.replace("@TITLE@", v)
        detailfileDic = functions.generatePageLinks(pNums)
###        # load page template # wir brauchen template_index.html
        with open(settings["template_index"], "r", encoding="utf8") as ft:
            template = ft.read()
        # load page template
        #with open(settings["template_page"], "r", encoding="utf8") as ft:
            #template = ft.read()
        # load individual bib record
        bibFile = pathToBibFile
        bibDic = functions.loadBib(bibFile)
        bibForHTML = functions.prettifyBib(bibDic[citeKey]["complete"])
        orderedPages = list(detailfileDic.keys())
        for o in range(0, len(orderedPages)):
            #print(o)
            k = orderedPages[o]
            v = pageDic[orderedPages[o]]
            pageTemp = template
            pageTemp = pageTemp.replace("@PAGELINKS@", v)
            pageTemp = pageTemp.replace("@PATHTOFILE@", "")
            pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey)
def generatePublicationInterface(citeKey, pathToBibFile):
    print("=" * 80)
    print(citeKey)

    jsonFile = pathToBibFile.replace(".bib", ".json")
    with open(jsonFile, encoding="utf8") as jsonData:
        ocred = json.load(jsonData)
        pNums = ocred.keys()

        pageDic = functions.generatePageLinks(pNums)

        # load page template
        with open(settings["template_page"], "r", encoding="utf8") as ft:
            template = ft.read()

        # load individual bib record
        bibFile = pathToBibFile
        bibDic = functions.loadBib(bibFile)
        bibForHTML = functions.prettifyBib(bibDic[citeKey]["complete"])

        orderedPages = list(pageDic.keys())

        for o in range(0, len(orderedPages)):
            #print(o)
            k = orderedPages[o]
            v = pageDic[orderedPages[o]]

            pageTemp = template
            pageTemp = pageTemp.replace("@PAGELINKS@", v)
            pageTemp = pageTemp.replace("@PATHTOFILE@", "")
            pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey)

            if k != "DETAILS":
                mainElement = '<img src="@PAGEFILE@" width="100%" alt="">'.replace(
                    "@PAGEFILE@", "%s.png" % k)
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@",
                                            ocred[k].replace("\n", "<br>"))
            else:
                mainElement = bibForHTML.replace("\n", "<br> ")
                mainElement = '<div class="bib">%s</div>' % mainElement
                mainElement += '\n<img src="wordcloud.jpg" width="100%" alt="wordcloud">'
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@", "")

            # @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@
            if k == "DETAILS":
                nextPage = "0001.html"
                prevPage = ""
            elif k == "0001":
                nextPage = "0002.html"
                prevPage = "DETAILS.html"
            elif o == len(orderedPages) - 1:
                nextPage = ""
                prevPage = orderedPages[o - 1] + ".html"
            else:
                nextPage = orderedPages[o + 1] + ".html"
                prevPage = orderedPages[o - 1] + ".html"

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage)
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

            pagePath = os.path.join(
                pathToBibFile.replace(citeKey + ".bib", ""), "pages",
                "%s.html" % k)
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp)
def generatePublicationInterface(citeKey, pathToBibFile):
    print("="*80)
    print(citeKey) #print the citeKey of the publication

    jsonFile = pathToBibFile.replace(".bib", ".json") #take the bibFile
    with open(jsonFile, encoding="utf8") as jsonData:
        ocred = json.load(jsonData) #load the bibFile
        pNums = ocred.keys() #take the citation keys
        pageDic = functions.generatePageLinks(pNums) #load the function which generates links to all pages in a publication

        # load page template
        with open(settings["template_page"], "r", encoding="utf8") as ft:
            template = ft.read() #load the page template

        # load individual bib record
        bibFile = pathToBibFile #take the pathToBibFile
        bibDic = functions.loadBib(bibFile) #load the loadBib-function which loads the bibTex data into a dictionary
        bibForHTML = bibText.prettifyBib(bibDic[citeKey]["complete"]) #load the prettifyBib-function to make the bib record more readable (taking the complete bib record)

        orderedPages = list(pageDic.keys()) #create a list of keys to get all page numbers

        for o in range(0, len(orderedPages)): #loop through the pages
            #print(o)
            k = orderedPages[o] #take the number of the page as key
            v = pageDic[orderedPages[o]] #take the links to the other pages as value

            pageTemp = template #assign the page template to a temporary variable
            pageTemp = pageTemp.replace("@PAGELINKS@", v) #replace the Pagelinks item with the links to the other pages
            pageTemp = pageTemp.replace("@PATHTOFILE@", "") #replace the Pathtofile item with a blank
            pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey) #replace the Citationkey item with the citation key

            emptyResults = '<tr><td><i>%s</i></td><td><i>%s</i></td><td><i>%s</i></td></tr>' #create a template for the similarity values

           if k != "DETAILS": #if the page is not the details page
                mainElement = '<img src="@PAGEFILE@" width="100%" alt="">'.replace("@PAGEFILE@", "%s.png" % k) #takes the .png-file of the OCRed text of this page

                pageKey = citeKey+"_%05d" % roundUp(int(k), 5) #take the citationKey and the pageNumbers
                #print(pageKey)
                if pageKey in pageConnData: #check if there are any similar pageclusters
                    formattedResults = "\n".join(pageConnData[pageKey]) #add them 
                    #input(formattedResults)
                else:
                    formattedResults = emptyResults % ("no data", "no data", "no data") #add that there are no similar pageclusters

                mainElement += connectionsTemplate.replace("@CONNECTEDTEXTSTEMP@", formattedResults) #replace the wildcard in the template with the actual values for simliar texts
                mainElement += ocrTemplate.replace("@OCREDCONTENTTEMP@", ocred[k].replace("\n", "<br>")) #replace the wildcard in the template with the OCRed text of the page
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement) #repace the wildcard with the added actual values
            else: #if the page is the details page
                reference = generateReferenceSimple(citeKey) #take the information about the publication we've generated
                mainElement = "<h3>%s</h3>\n\n" % reference #add it as a header

                bibElement = '<div class="bib">%s</div>' % bibForHTML.replace("\n", "<br> ") #take the bibliogaphical data
                bibElement = generalTemplate.replace("@ELEMENTCONTENT@", bibElement) #replace the wildcard in the general template with the bibliographical data
                bibElement = bibElement.replace("@ELEMENTHEADER@", "BibTeX Bibliographical Record") #add a meaningful description
                mainElement += bibElement + "\n\n" #add a new line

                wordCloud = '\n<img src="../' + citeKey + '_wCloud.jpg" width="100%" alt="wordcloud">' #take the wordcloud we've generated
                wordCloud = generalTemplate.replace("@ELEMENTCONTENT@", wordCloud) #replace the wildcard in the general template with the wordcloud
                wordCloud = wordCloud.replace("@ELEMENTHEADER@", "WordCloud of Keywords (<i>tf-idf</i>)") #add a meaningful description
                mainElement += wordCloud + "\n\n" #add a new line

                if citeKey in publConnData: #check if there are any similar texts
                    formattedResults = "\n".join(publConnData[citeKey]) #add them
                    #input(formattedResults)
                else:
                    formattedResults = emptyResults % ("no data", "no data", "no data") #add that there are non similar texts

                mainElement += connectionsTemplate.replace("@CONNECTEDTEXTSTEMP@", formattedResults) #replace the wildcard in the template with the actual information about similar texts


                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement) #replace the wildcard in the pagetemplate with the added content

            # @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@
             if k == "DETAILS": #if the page is the Details page
                nextPage = "0001.html" #the next page is the first page of the record
                prevPage = "" #there is no previous page
            elif k == "0001": #if the page is the first page of the record
                nextPage = "0002.html" #the next page is the second page of the record
                prevPage = "DETAILS.html" #the previous page is the Details page
            elif o == len(orderedPages)-1: #if the page is the last page of the record
                nextPage = "" #there is no next page
                prevPage = orderedPages[o-1] + ".html" #the previous page is the penultimate page of the record
            else: #for all other pages
                nextPage = orderedPages[o+1] + ".html" #the next page is the page behind in the record
                prevPage = orderedPages[o-1] + ".html" #the previous page is the page before in the record

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage) #replace the wildcard with a link to the page assigned in the lines before
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage) #replace the Previouspagehtml item with a link to the page assigned in the lines before

            pagePath = os.path.join(pathToBibFile.replace(citeKey+".bib", ""), "pages", "%s.html" % k) #create a filepath to each page in the pages-folder of each publication
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp) #create and save each page in that pages folder
Пример #5
0
def generatePublicationInterface(
        citeKey,
        pathToBibFile):  # function takes a citation key and path to bib file
    print("=" * 80)
    print(citeKey)

    jsonFile = pathToBibFile.replace(".bib", ".json")
    with open(jsonFile,
              encoding="utf8") as jsonData:  #add encoding to not get error;
        ocred = json.load(jsonData)
        pNums = ocred.keys()

        pageDic = functions.generatePageLinks(
            pNums
        )  # number of pages of each publication;links to make it navigatable

        # load page template
        with open(settings["template_page"], "r", encoding="utf8") as ft:
            template = ft.read()

        # load individual bib record
        bibFile = pathToBibFile
        bibDic = functions.loadBib(bibFile)  #loads entire bib
        bibForHTML = functions.prettifyBib(
            bibDic[citeKey]
            ["complete"])  #makes the bib file look better for this view

        orderedPages = list(
            pageDic.keys())  #list of all keys and pagenummers from page dic

        for o in range(
                0,
                len(orderedPages)):  #long loop that creates every single page
            #print(o)
            k = orderedPages[o]
            v = pageDic[orderedPages[o]]

            pageTemp = template  # take a template
            pageTemp = pageTemp.replace("@PAGELINKS@",
                                        v)  # replace values in template
            pageTemp = pageTemp.replace("@PATHTOFILE@", "")
            pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey)

            if k != "DETAILS":  #one page is different than the rest;this for regular
                mainElement = '<img src="@PAGEFILE@" width="100%" alt="">'.replace(
                    "@PAGEFILE@", "%s.png" % k)
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@",
                                            ocred[k].replace("\n", "<br>"))
            else:  # if pages is details.html
                mainElement = bibForHTML.replace("\n", "<br> ")
                mainElement = '<div class="bib">%s</div>' % mainElement  # class for changes in style sheet
                mainElement += '\n<img src="wordcloud.jpg" width="100%" alt="wordcloud">'  #wordcloud we will generate in the next class
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@", "")

            # @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@  #links to next and previous page; and when we are on the last it stops
            if k == "DETAILS":
                nextPage = "0001.html"
                prevPage = ""
            elif k == "0001":
                nextPage = "0002.html"
                prevPage = "DETAILS.html"
            elif o == len(orderedPages) - 1:
                nextPage = ""
                prevPage = orderedPages[o - 1] + ".html"
            else:
                nextPage = orderedPages[o + 1] + ".html"
                prevPage = orderedPages[o - 1] + ".html"

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@",
                                        nextPage)  ##find replace in template
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

            pagePath = os.path.join(pathToBibFile.replace(
                citeKey + ".bib", ""), "pages",
                                    "%s.html" % k)  # saves the actual page
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp)
Пример #6
0
def generatePublicationInterface(citeKey, pathToBibFile):
    print("="*80)
    print(citeKey)

    jsonFile = pathToBibFile.replace(".bib", ".json")
    with open(jsonFile, "r", encoding ="utf8") as jsonData:
        ocred = json.load(jsonData)
        pNums = ocred.keys()
        pageDic = functions.generatePageLinks(pNums)

        # load page template
        with open(settings["template_page"], "r", encoding="utf8") as ft:
            template = ft.read()

        # load individual bib record
        bibFile = pathToBibFile
        bibDic = functions.loadBib(bibFile)
        bibForHTML = functions.prettifyBib(bibDic[citeKey]["complete"])

        orderedPages = list(pageDic.keys())

        for o in range(0, len(orderedPages)):
            #print(o)
            k = orderedPages[o]
            #input(k)
            v = pageDic[orderedPages[o]]

            pageTemp = template
            pageTemp = pageTemp.replace("@PAGELINKS@", v)
            pageTemp = pageTemp.replace("@PATHTOFILE@", "")
            pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey)

            emptyResults = '<tr><td><i>%s</i></td><td><i>%s</i></td><td><i>%s</i></td></tr>'

            if k != "DETAILS":
                mainElement = '<img src="@PAGEFILE@" width="100%" alt="">'.replace("@PAGEFILE@", "%s.png" % k)

                pageKey = citeKey+"_%05d" % roundUp(int(k), 5)
                #print(pageKey)
                if pageKey in pageConnData:
                    formattedResults = "\n".join(pageConnData[pageKey])
                    #input(formattedResults)
                else:
                    formattedResults = emptyResults % ("no data", "no data", "no data")

                mainElement += connectionsTemplate.replace("@CONNECTEDTEXTSTEMP@", formattedResults)
                mainElement += ocrTemplate.replace("@OCREDCONTENTTEMP@", ocred[k].replace("\n", "<br>"))
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
            else:
                reference = generateReferenceSimple(citeKey)
                mainElement = "<h3>%s</h3>\n\n" % reference

                bibElement = '<div class="bib">%s</div>' % bibForHTML.replace("\n", "<br> ")
                bibElement = generalTemplate.replace("@ELEMENTCONTENT@", bibElement)
                bibElement = bibElement.replace("@ELEMENTHEADER@", "BibTeX Bibliographical Record")
                mainElement += bibElement + "\n\n"

                wordCloud = '\n<img src="../' + citeKey + '_wCloud.jpg" width="100%" alt="wordcloud">'
                wordCloud = generalTemplate.replace("@ELEMENTCONTENT@", wordCloud)
                wordCloud = wordCloud.replace("@ELEMENTHEADER@", "WordCloud of Keywords (<i>tf-idf</i>)")
                mainElement += wordCloud + "\n\n"

                if citeKey in publConnData:
                    formattedResults = "\n".join(publConnData[citeKey])
                    #input(formattedResults)
                else:
                    formattedResults = emptyResults % ("no data", "no data", "no data")

                mainElement += connectionsTemplate.replace("@CONNECTEDTEXTSTEMP@", formattedResults)


                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)

            # @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@
            if k == "DETAILS":
                nextPage = "0001.html"
                prevPage = ""
            elif k == "0001":
                nextPage = "0002.html"
                prevPage = "DETAILS.html"
            elif o == len(orderedPages)-1:
                nextPage = ""
                prevPage = orderedPages[o-1] + ".html"
            else:
                nextPage = orderedPages[o+1] + ".html"
                prevPage = orderedPages[o-1] + ".html"

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage)
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

            pagePath = os.path.join(pathToBibFile.replace(citeKey+".bib", ""), "pages", "%s.html" % k)
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp)
Пример #7
0
def generatePublicationInterface(citeKey, pathToBibFile):
    print("=" * 80)
    print(citeKey)

    jsonFile = pathToBibFile.replace(".bib", ".json")
    with open(jsonFile) as jsonData:
        ocred = json.load(jsonData)
        pNums = ocred.keys()

        pageDic = functions.generatePageLinks(pNums)

        # load page template
        with open(settings["template_page"], "r",
                  encoding="utf8") as ft:  #add encoding to avoid error
            template = ft.read()

        # load individual bib record
        bibFile = pathToBibFile
        bibDic = functions.loadBib(bibFile)  #loads entire bib
        bibForHTML = functions.prettifyBib(
            bibDic[citeKey]["complete"])  #structures the file

        orderedPages = list(
            pageDic.keys())  # generates list of all the keys and pgnumbers

        for o in range(0, len(orderedPages)):  #loop to create pages
            #print(o)
            k = orderedPages[o]
            v = pageDic[orderedPages[o]]

            pageTemp = template
            pageTemp = pageTemp.replace("@PAGELINKS@", v)  #take a template
            pageTemp = pageTemp.replace("@PATHTOFILE@",
                                        "")  #replace the values
            pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey)

            if k != "DETAILS":  # for regular pages; one page is different from the others
                mainElement = '<img src="@PAGEFILE@" width="100%" alt="">'.replace(
                    "@PAGEFILE@", "%s.png" % k)
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@",
                                            ocred[k].replace("\n", "<br>"))
            else:  # if the page is html
                mainElement = bibForHTML.replace("\n", "<br> ")
                mainElement = '<div class="bib">%s</div>' % mainElement
                mainElement += '\n<img src="wordcloud.jpg" width="100%" alt="wordcloud">'
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@", "")

            # @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@ #link to previous/ next pag; stop on last page
            if k == "DETAILS":
                nextPage = "0001.html"
                prevPage = ""
            elif k == "0001":
                nextPage = "0002.html"
                prevPage = "DETAILS.html"
            elif o == len(orderedPages) - 1:
                nextPage = ""
                prevPage = orderedPages[o - 1] + ".html"
            else:
                nextPage = orderedPages[o + 1] + ".html"
                prevPage = orderedPages[o - 1] + ".html"

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage)
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

            pagePath = os.path.join(pathToBibFile.replace(
                citeKey + ".bib", ""), "pages",
                                    "%s.html" % k)  #saves the origin page
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp)
Пример #8
0
def generatePublicationInterface(citeKey, pathToBibFile):
    print("=" * 80)
    print(citeKey)

    ######################
    #SINGLE PUBLICATION#
    ######################
    jsonFile = pathToBibFile.replace(".bib", ".json")
    with open(jsonFile) as jsonData:
        ocred = json.load(jsonData)
        pNums = ocred.keys()

        pageDic = functions.generatePageLinks(pNums)

        # load page template
        with open(settings["template_page"], "r", encoding="utf8") as ft:
            template = ft.read()

        # load individual bib record
        bibFile = pathToBibFile
        bibDic = functions.loadBib(bibFile)
        bibForHTML = functions.prettifyBib(bibDic[citeKey]["complete"])

        orderedPages = list(pageDic.keys())

        for o in range(
                0,
                len(orderedPages)):  #loop through pages of individual bib file
            #print(o)
            k = orderedPages[o]  #page number
            v = pageDic[orderedPages[o]]  #page

            pageTemp = template
            pageTemp = pageTemp.replace("@PAGELINKS@", v)
            pageTemp = pageTemp.replace("@PATHTOFILE@", "")
            pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey)

            if k != "DETAILS":
                mainElement = '<img src="@PAGEFILE@" width="100%" alt="">'.replace(
                    "@PAGEFILE@", "%s.png" % k)
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@",
                                            ocred[k].replace("\n", "<br>"))
            else:
                mainElement = bibForHTML.replace("\n", "<br> ")
                mainElement = '<div class="bib">%s</div>' % mainElement
                mainElement += '\n<img src="wordcloud.jpg" width="100%" alt="wordcloud">'
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@", "")

            # @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@
            if k == "DETAILS":
                nextPage = "0001.html"
                prevPage = ""
            elif k == "0001":
                nextPage = "0002.html"
                prevPage = "DETAILS.html"
            elif o == len(orderedPages) - 1:
                nextPage = ""
                prevPage = orderedPages[o - 1] + ".html"
            else:
                nextPage = orderedPages[o + 1] + ".html"
                prevPage = orderedPages[o - 1] + ".html"

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage)
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

            pagePath = os.path.join(
                pathToBibFile.replace(citeKey + ".bib", ""), "pages",
                "%s.html" % k)
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp)

######################
#INDEX#
######################
#load templates for index page

    with open(settings["template_index"], "r", encoding="utf8") as ti:
        template_i = ti.read()

    with open(settings["content_index"], "r", encoding="utf8") as ci:
        content_i = ci.read()

    pageTemp_i = template_i
    pageTemp_i = pageTemp_i.replace(
        "@MAINCONTENT@", content_i
    )  #use content.html to create index content by replacing the string @MAINCONTENT@

    #set path for index page of Memex
    pagePath_i = settings["path_to_index"]
    with open(pagePath_i, "w", encoding="utf8") as f1:
        f1.write(pageTemp_i)

######################
#CONTENT#
#hint: <li><a href="@PATHTOPUBL@/pages/DETAILS.html">[@CITEKEY@]</a> @AUTHOR@ (@DATE@) - <i>@TITLE@</i></li>
######################

#what I have so far, provides only one link in the sidenavgation to the detail page of one publication

    print(pathToBibFile)
    detailPage = pathToBibFile.replace("/" + citeKey + ".bib", "")
    detailPage = detailPage.replace("/_data", "")
    print(detailPage)

    pageTemp_c = pageTemp_i.replace("@CITEKEY@", citeKey)
    pageTemp_c = pageTemp_i.replace("@PATHTOPUBL@", detailPage)

    #set path for content page of Memex
    pagePath_c = settings["path_to_content"]
    with open(pagePath_c, "w", encoding="utf8") as f2:
        f2.write(pageTemp_c)
Пример #9
0
def generatePublicationInterface(citeKey, pathToBibFile):
    print("=" * 80)
    print(citeKey)

    ######################
    #SINGLE PUBLICATION#
    ######################
    jsonFile = pathToBibFile.replace(".bib", ".json")  #get JSON files
    with open(jsonFile) as jsonData:
        ocred = json.load(jsonData)  #open file
        pNums = ocred.keys()  #get pages of publication

        pageDic = functions.generatePageLinks(pNums)  #use pre-defined function

        # load page template
        with open(settings["template_page"], "r", encoding="utf8") as ft:
            template = ft.read()

        # load individual bib record
        bibFile = pathToBibFile
        bibDic = functions.loadBib(bibFile)  #use pre-defined function
        print(bibDic)
        bibForHTML = functions.prettifyBib(
            bibDic[citeKey]["complete"])  #use pre-defined function

        orderedPages = list(pageDic.keys())  #make list of pages

        authorOrEditor = "[No data]"
        if "editor" in bibDic[citeKey]:
            authorOrEditor = bibDic[citeKey]["editor"]
        if "author" in bibDic[citeKey]:
            authorOrEditor = bibDic[citeKey]["author"]

        date = "unidentified"
        if "year" in bibDic[citeKey]:
            date = bibDic[citeKey]["year"]

        for o in range(
                0,
                len(orderedPages)):  #loop through pages of individual bib file
            #print(o)
            k = orderedPages[o]  #page number
            v = pageDic[orderedPages[o]]  #page

            pageTemp = template
            pageTemp = pageTemp.replace("@PAGELINKS@",
                                        v)  #replace pattern with page
            pageTemp = pageTemp.replace("@PATHTOFILE@",
                                        "")  #replace pattern with empty string
            pageTemp = pageTemp.replace(
                "@CITATIONKEY@", citeKey)  #replace pattern with publication ID
            wCloud = '\n<img src="../@WCLOUD@" width="100%" alt="wordcloud">'.replace(
                "@WCLOUD@", "%s.jpg" % citeKey)
            pageTemp = pageTemp.replace("@WORD_CLOUD@", wCloud)
            pageTemp = pageTemp.replace("@PUB_AUTHOR@", authorOrEditor)
            pageTemp = pageTemp.replace("@PUB_YEAR@", date)
            pageTemp = pageTemp.replace(
                "@PUB_TITLE@",
                bibDic[citeKey]["title"].replace("{", "").replace("}", ""))

            if k != "DETAILS":  #if not set to overview page
                mainElement = '<img src="@PAGEFILE@" width="100%" alt="">'.replace(
                    "@PAGEFILE@",
                    "%s.png" % k)  #use ocred page as main element
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@",
                                            ocred[k].replace("\n", "<br>"))
            else:
                mainElement = bibForHTML.replace("\n", "<br> ")
                mainElement = '<div class="bib">%s</div>' % mainElement
                #mainElement += '\n<img src="../@WCLOUD@" width="100%" alt="wordcloud">'.replace("@WCLOUD@", "%s.jpg" % citeKey)
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                pageTemp = pageTemp.replace("@OCREDCONTENT@", "")

            # @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@
            #set order of the page layout
            if k == "DETAILS":
                nextPage = "0001.html"
                prevPage = ""  #no previous page
            elif k == "0001":
                nextPage = "0002.html"
                prevPage = "DETAILS.html"  #set previous page because it is not a serial number
            elif o == len(orderedPages) - 1:  #take care of special case
                nextPage = ""
                prevPage = orderedPages[o - 1] + ".html"
            else:
                nextPage = orderedPages[o + 1] + ".html"
                prevPage = orderedPages[o - 1] + ".html"

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage)
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

            pagePath = os.path.join(
                pathToBibFile.replace(citeKey + ".bib", ""), "pages",
                "%s.html" % k)
            with open(pagePath, "w",
                      encoding="utf8") as f9:  #set path for individual pages
                f9.write(pageTemp)
Пример #10
0
def generatePublicationInterface(
        citeKey, pathToBibFile):  # Function generates the HTML pages.
    # Uses citeKey and pathToBibFile as 2 arguments
    print("=" * 80)
    print(citeKey)

    jsonFile = pathToBibFile.replace(
        ".bib",
        ".json")  # Loads JSON file, gets path by replacing .bib extension
    # with .json extension
    with open(jsonFile) as jsonData:
        ocred = json.load(jsonData)
        pNums = ocred.keys()  # Generates page links

        pageDic = functions.generatePageLinks(
            pNums)  # Page dictionary creates a table of contents with a unique
        # name for each of the publications, creating links (making
        # it navigable). It's the panel on the left.

        # Loads page template
        with open(settings["template_page"], "r", encoding="utf8") as ft:
            template = ft.read()

        # Loads individual bib record (can use the functions that have already been written)
        bibFile = pathToBibFile
        bibDic = functions.loadBib(
            bibFile)  #LoadBib loads the entire bibliography or a single record
        bibforHTML = functions.prettifyBib(
            bibDic[citeKey]["complete"])  # Makes the bib file look better for
        # this view, removing curly brackets and
        # unnecessary fields (it's a simple find
        # and replace).

        orderedPages = list(
            pageDic.keys()
        )  # Creates a list of all the keys to generate page numbers, taken from
        # the page dictionary generated from the function above.

        # This is just a long loop that creates every single page.
        for o in range(0, len(orderedPages)):
            #print(o)
            k = orderedPages[o]
            v = pageDic[orderedPages[o]]

            pageTemp = template  # Takes a template
            pageTemp = pageTemp.replace(
                "@PAGELINKS@",
                v)  # Replaces all of the items in the template with
            # different values, creating an individual page for
            # every page in the publication.
            pageTemp = pageTemp.replace("@PATHTOFILE@", "")
            pageTemp = pageTemp.replace("@CITATIONKEY@", citekey)

            # This concerns the DETAILS page
            if k != "DETAILS":  # All of the pages are numbered, except for the initial page
                mainElement = '<ing src="@PAGEFILE@" width="100%" alt=">'.replace(
                    "@PAGEFILE@", "%s.png" % k)
                # These are the regular pages
                pageTemp = pageTemp.replace(
                    "@MAINELEMENT@",
                    mainElement)  # Replaces it with the main element
                pageTemp = pageTemp.replace("@OCREDCONTENT@", ocred[k].replace(
                    "\n", "<br>"))  # Removes the OCR
            else:  # When the page is 'DETAILS'
                mainElement = bibForHTML.replace(
                    "\n", "<br> ")  # DETAILS page has a HTML
                mainElement = '<div class="bib">%s</div>' % mainElement  # Inserts element into a specific class
                # NB: class and div tags in HTML correspond to the stylesheet specifications
                mainElement += '\n<img src="wordcloud.jpg" width="100%" alt="wordcloud">'  # Word cloud element
                pageTemp = pageTemp.replace(
                    "@MAINELEMENT@",
                    mainElement)  # Replaces main element with the element
                # that was formed before
                pageTemp = pageTemp.replace("@OCREDCONTENT@",
                                            "")  # Removes the OCR content

# Creates links to the 'previous' and 'next' pages.
# @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@
            if k == "DETAILS":  # When on the first page 'DETAILS', you won't have the one before it
                nextPage = "0001.html"
                prevPage = ""  # Creates an empty element for the previous page that doesn't exist
            elif k == "0001":  # When on page 1,...
                nextPage = "0002.html"
                prevPage = "DETAILS.html"  # ... the page before it will be DETAILS.
            elif o == len(orderedPages) - 1:  # When on the last page,...
                nextPage = ""  # ... there won't be a next page
                prevPage = orderedPages[o - 1] + ".html"
            else:  # Regular pages
                nextPage = orderedPages[
                    o + 1] + ".html"  # Adds 1 for the next page
                prevPage = orderedPages[
                    o - 1] + ".html"  # Subtracts 1 for previous page

            pageTemp = pageTemp.replace(
                "@NEXTPAGEHTML@", nextPage)  # Find and replace, which replaces
            # the next value with a given value
            pageTemp = pageTemp.replace(
                "@PREVIOUSPAGEHTML@", prevPage)  # Replaces previous value with
            # a given value.

            pagePath = os.path.join(
                pathToBibFile.replace(citeKey + ".bib", ""), "pages",
                "%s.html" % k)
            # Saves the actual page.
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp)
Пример #11
0
def generatePublicationInterface(citeKey, pathToBibFile):
	print("="*80)
	print(citeKey)

######################
#SINGLE PUBLICATION#
######################
	jsonFile = pathToBibFile.replace(".bib", ".json") #get JSON files
	with open(jsonFile) as jsonData:
		ocred = json.load(jsonData) #open file
		pNums = ocred.keys() #get pages of publication

		pageDic = functions.generatePageLinks(pNums) #use pre-defined function
		
		# load page template
		with open(settings["template_page"], "r", encoding="utf8") as ft:
			template = ft.read()

		# load individual bib record
		bibFile = pathToBibFile
		bibDic = functions.loadBib(bibFile) #use pre-defined function
		bibForHTML = functions.prettifyBib(bibDic[citeKey]["complete"]) #use pre-defined function

		orderedPages = list(pageDic.keys()) #make list of pages

		authorOrEditor = "[No data]"
		if "editor" in bibDic[citeKey]:
			authorOrEditor = bibDic[citeKey]["editor"]
		if "author" in bibDic[citeKey]:
			authorOrEditor = bibDic[citeKey]["author"]

		date = "nodate"
		if "year" in bibDic[citeKey]:
			date = bibDic[citeKey]["year"]

		for o in range(0, len(orderedPages)): #loop through pages of individual bib file
			#print(o)
			k = orderedPages[o] #page number
			v = pageDic[orderedPages[o]] #page

			pageTemp = template 
			pageTemp = pageTemp.replace("@PAGELINKS@", v) #replace pattern with page
			pageTemp = pageTemp.replace("@PATHTOFILE@", "") #replace pattern with empty string
			pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey) #replace pattern with publication ID
			wCloud = '\n<img src="../@WCLOUD@" width="100%" alt="wordcloud">'.replace("@WCLOUD@", "%s.jpg" % citeKey)
			pageTemp = pageTemp.replace("@WORD_CLOUD@", wCloud)
			pageTemp = pageTemp.replace("@PUB_AUTHOR@", authorOrEditor)
			pageTemp = pageTemp.replace("@PUB_YEAR@", date)
			pageTemp = pageTemp.replace("@PUB_TITLE@", bibDic[citeKey]["title"].replace("{", "").replace("}", ""))

			emptyResults = '<tr><td><i>%s</i></td><td><i>%s</i></td><td><i>%s</i></td></tr>'


			if k != "DETAILS": #if not set to overview page
				mainElement = '<img src="@PAGEFILE@" width="100%" alt="">'.replace("@PAGEFILE@", "%s.png" % k) #use ocred page as main element

				pageKey = citeKey+"__%04d" % roundUp(int(k), 4)
				#print("pagekey: ", pageKey)
				if pageKey in pageConnData:
					print("pagekey found: ", pageKey)
					formattedResults = "\n".join(pageConnData[pageKey])
					print(formattedResults)
				else:
					formattedResults = emptyResults % ("no data", "no data", "no data")

				mainElement += connectionsTemplate.replace("@CONNECTEDTEXTSTEMP@", formattedResults)
				mainElement += ocrTemplate.replace("@OCREDCONTENTTEMP@", ocred[k].replace("\n", "<br>"))
				pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)

			else:
				#reference = generateReferenceSimple(citeKey)
				reference = "OVERVIEW"
				mainElement = "<h4><font color=#8baad1><center>%s</center></font></h4>\n\n" % reference

				bibElement = '<div class="bib">%s</div>' % bibForHTML.replace("\n", "<br> ")
				bibElement = generalTemplate.replace("@ELEMENTCONTENT@", bibElement)
				bibElement = bibElement.replace("@ELEMENTHEADER@", "BibTeX Bibliographical Record")
				mainElement += bibElement + "\n\n"

				wordCloud = '\n<img src="../' + citeKey + '.jpg" width="100%" alt="wordcloud">'
				wordCloud = generalTemplate.replace("@ELEMENTCONTENT@", wordCloud)
				wordCloud = wordCloud.replace("@ELEMENTHEADER@", "WordCloud of Keywords (<i>tf-idf</i>)")
				mainElement += wordCloud + "\n\n"
				
				if citeKey in publConnData:
					formattedResults = "\n".join(publConnData[citeKey])
					#input(formattedResults)
				else:
					formattedResults = emptyResults % ("no data", "no data", "no data")

				mainElement += connectionsTemplate.replace("@CONNECTEDTEXTSTEMP@", formattedResults)


				pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)

			# @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@
			#set order of the page layout
			if k == "DETAILS":
				nextPage = "0001.html"
				prevPage = "" #no previous page
			elif k == "0001":
				nextPage = "0002.html"
				prevPage = "DETAILS.html" #set previous page because it is not a serial number
			elif o == len(orderedPages)-1: #take care of special case
				nextPage = ""
				prevPage = orderedPages[o-1] + ".html"
			else:
				nextPage = orderedPages[o+1] + ".html"
				prevPage = orderedPages[o-1] + ".html"

			pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage)
			pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

			pagePath = os.path.join(pathToBibFile.replace(citeKey+".bib", ""), "pages", "%s.html" % k)
			with open(pagePath, "w", encoding="utf8") as f9: #set path for individual pages
				f9.write(pageTemp)
Пример #12
0
def generatePublicationInterface(citeKey, pathToBibFile):
    print("=" * 80)
    print(citeKey)

    # replace file extension
    jsonFile = pathToBibFile.replace(".bib", ".json")

    # open json file
    with open(jsonFile) as jsonData:
        # save the content from the json file to ocred (string)
        ocred = json.load(jsonData)
        # save page numbers to pNums
        pNums = ocred.keys()

        # call the function generatePageLinks with pNums as input value and save the return value to pageDic
        pageDic = functions.generatePageLinks(pNums)

        # load page template
        with open(settings["template_page"], "r", encoding="utf8") as ft:
            template = ft.read()

        # load individual bib record
        bibFile = pathToBibFile
        # call the function loadBib with bibFile as input value and save the return value to bibDic
        bibDic = functions.loadBib(bibFile)
        # call the function prettifyBib with bibDic[citeKey]["complete"] as input value and save the return value to bibForHTML
        bibForHTML = functions.prettifyBib(bibDic[citeKey]["complete"])

        # create a list with the keys oft the dictionary pageDic and save it to orderedPages
        orderedPages = list(pageDic.keys())

        # loop: start = 0; end = number of elements in the list orderedPages
        for o in range(0, len(orderedPages)):
            #print(o)
            k = orderedPages[o]
            v = pageDic[orderedPages[o]]

            # save template to pageTemp
            pageTemp = template
            # replace @MAINCONTENT@ with v
            pageTemp = pageTemp.replace("@PAGELINKS@", v)
            # replace @PATHTOFILE@ with ""
            pageTemp = pageTemp.replace("@PATHTOFILE@", "")
            # replace @CITATIONKEY@ with citeKey
            pageTemp = pageTemp.replace("@CITATIONKEY@", citeKey)

            if k != "DETAILS":  # normal page
                # png: replace @PAGEFILE@ with name of the image
                mainElement = '<img src="@PAGEFILE@" width="100%" alt="">'.replace(
                    "@PAGEFILE@", "%s.png" % k)
                # replace @MAINELEMENT@ with mainElement
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                # replace @OCREDCONTENT@ with the ocred text (replace "\n" by "<br>")
                pageTemp = pageTemp.replace("@OCREDCONTENT@",
                                            ocred[k].replace("\n", "<br>"))
            else:  # detail page
                # save the detail text (bib file) to mainElement
                mainElement = bibForHTML.replace("\n", "<br> ")
                # add html to detail text
                mainElement = '<div class="bib">%s</div>' % mainElement
                # add image workcloud
                mainElement += '\n<img src="wordcloud.jpg" width="100%" alt="wordcloud">'
                # replace @MAINELEMENT@ with mainElement
                pageTemp = pageTemp.replace("@MAINELEMENT@", mainElement)
                # replace @OCREDCONTENT@ with ""
                pageTemp = pageTemp.replace("@OCREDCONTENT@", "")

            # @NEXTPAGEHTML@ and @PREVIOUSPAGEHTML@
            # detail page
            if k == "DETAILS":
                nextPage = "0001.html"
                # no previous page
                prevPage = ""
            # first page of publication
            elif k == "0001":
                nextPage = "0002.html"
                # previous page = detail page
                prevPage = "DETAILS.html"
            # last page of publication
            elif o == len(orderedPages) - 1:
                # no next page
                nextPage = ""
                # prevPage: page - 1
                prevPage = orderedPages[o - 1] + ".html"
            # "normal" page of publication
            else:
                # nextPage: page + 1
                nextPage = orderedPages[o + 1] + ".html"
                # prevPage: page - 1
                prevPage = orderedPages[o - 1] + ".html"

            # replace @NEXTPAGEHTML@ with nextPage
            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage)
            # replace @PREVIOUSPAGEHTML@ with prevPage
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

            # path to html pages
            pagePath = os.path.join(
                pathToBibFile.replace(citeKey + ".bib", ""), "pages",
                "%s.html" % k)
            # create html pages
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp)