Exemplos de openFileToDict em Python, exemplos de paperUtils.openFileToDict em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: PreProcessClass.py Projeto: nvdinh/ScientoPy

    def preprocess(self, args=''):

        if args == '':
            args = self

        # *****************  Program start ********************************************************
        print("\n\nScientoPy prerprocess")
        print("======================\n")

        # Check python version
        if sys.version_info[0] < 3:
            print("ERROR, you are using Python 2, Python 3.X.X required")
            print("")
            return 0

        # Create output folders if not exist
        if not os.path.exists(os.path.join(globalVar.DATA_OUT_FOLDER)):
            os.makedirs(os.path.join(globalVar.DATA_OUT_FOLDER))
        if not os.path.exists(os.path.join(globalVar.GRAPHS_OUT_FOLDER)):
            os.makedirs(os.path.join(globalVar.GRAPHS_OUT_FOLDER))
        if not os.path.exists(os.path.join(globalVar.RESULTS_FOLDER)):
            os.makedirs(os.path.join(globalVar.RESULTS_FOLDER))

        # Init variables
        paperDict = []
        globalVar.loadedPapers = 0
        globalVar.totalPapers = 0
        globalVar.papersScopus = 0
        globalVar.papersWoS = 0
        globalVar.omitedPapers = 0

        preProcessBrief = {}
        preProcessBrief["totalLoadedPapers"] = 0
        preProcessBrief["omittedPapers"] = 0
        preProcessBrief["papersAfterRemOmitted"] = 0
        preProcessBrief["loadedPapersScopus"] = 0
        preProcessBrief["loadedPapersWoS"] = 0

        # After duplication removal filter
        preProcessBrief["totalAfterRemDupl"] = 0
        preProcessBrief["removedTotalPapers"] = 0
        preProcessBrief["removedPapersScopus"] = 0
        preProcessBrief["removedPapersWoS"] = 0
        preProcessBrief["papersScopus"] = 0
        preProcessBrief["papersWoS"] = 0
        preProcessBrief["percenRemPapersScopus"] = 0
        preProcessBrief["percenRemPapersWos"] = 0

        # Read files from the dataInFolder
        for file in os.listdir(os.path.join(args.dataInFolder, '')):
            if file.endswith(".csv") or file.endswith(".txt"):
                print("Reading file: %s" %
                      (os.path.join(args.dataInFolder, '') + file))
                ifile = open(os.path.join(args.dataInFolder, '') + file,
                             "r",
                             encoding='utf-8')
                paperUtils.openFileToDict(ifile, paperDict)

        # If not documents found
        if (globalVar.loadedPapers == 0):
            print("ERROR: 0 documents found from " +
                  os.path.join(args.dataInFolder, ''))
            print("")
            return 0

        globalVar.OriginalTotalPapers = len(paperDict)

        preProcessBrief["totalLoadedPapers"] = globalVar.loadedPapers
        preProcessBrief["omittedPapers"] = globalVar.omitedPapers
        preProcessBrief[
            "papersAfterRemOmitted"] = globalVar.OriginalTotalPapers

        preProcessBrief["loadedPapersScopus"] = globalVar.papersScopus
        preProcessBrief["loadedPapersWoS"] = globalVar.papersWoS

        # Open the file to write the preprocessing log in CSV
        logFile = open(os.path.join(globalVar.DATA_OUT_FOLDER,
                                    globalVar.PREPROCESS_LOG_FILE),
                       'w',
                       encoding='utf-8')
        fieldnames = ["Info", "Number", "Percentage", "Source"
                      ] + globalVar.INCLUDED_TYPES + ["Total"]
        logWriter = csv.DictWriter(logFile,
                                   fieldnames=fieldnames,
                                   dialect=csv.excel,
                                   lineterminator='\n')
        logWriter.writeheader()

        logWriter.writerow({'Info': '***** Original data *****'})
        logWriter.writerow({
            'Info': 'Loaded papers',
            'Number': str(globalVar.loadedPapers)
        })

        logWriter.writerow({
            'Info':
            'Omitted papers by document type',
            'Number': ("%d" % (globalVar.omitedPapers)),
            'Percentage':
            ("%.1f%%" %
             (100.0 * globalVar.omitedPapers / globalVar.loadedPapers))
        })

        logWriter.writerow({
            'Info': 'Total papers after omitted papers removed',
            'Number': str(globalVar.OriginalTotalPapers)
        })
        logWriter.writerow({
            'Info':
            'Loaded papers from WoS',
            'Number': ("%d" % (globalVar.papersWoS)),
            'Percentage':
            ("%.1f%%" %
             (100.0 * globalVar.papersWoS / globalVar.OriginalTotalPapers))
        })
        logWriter.writerow({
            'Info':
            'Loaded papers from Scopus',
            'Number': ("%d" % (globalVar.papersScopus)),
            'Percentage':
            ("%.1f%%" %
             (100.0 * globalVar.papersScopus / globalVar.OriginalTotalPapers))
        })

        print("Loaded papers: %s" % len(paperDict))
        print("Omited papers: %s" % globalVar.omitedPapers)
        print("total papers: %s" % globalVar.OriginalTotalPapers)
        print("WoS papers: %s" % globalVar.papersWoS)
        print("Scopus papers: %s" % globalVar.papersScopus)
        paperUtils.sourcesStatics(paperDict, logWriter)

        # Removing duplicates
        if not args.noRemDupl:
            paperDict = paperUtils.removeDuplicates(paperDict, logWriter,
                                                    preProcessBrief)

        # if not remove duplicates
        else:
            preProcessBrief["totalAfterRemDupl"] = preProcessBrief[
                "papersAfterRemOmitted"]
            preProcessBrief["removedPapersScopus"] = 0
            preProcessBrief["removedPapersWoS"] = 0
            preProcessBrief["papersScopus"] = preProcessBrief[
                "loadedPapersScopus"]
            preProcessBrief["papersWoS"] = preProcessBrief["loadedPapersWoS"]

        # Filter papers with invalid year
        papersDictYear = list(filter(lambda x: x["year"].isdigit(), paperDict))

        # To avoid by zero division
        if preProcessBrief["totalAfterRemDupl"] > 0:
            percentagePapersWos = 100.0 * preProcessBrief[
                "papersWoS"] / preProcessBrief["totalAfterRemDupl"]
            percentagePapersScopus = 100.0 * preProcessBrief[
                "papersScopus"] / preProcessBrief["totalAfterRemDupl"]
        else:
            percentagePapersWos = 0
            percentagePapersScopus = 0

        logWriter.writerow({
            'Info': 'Papers from WoS',
            'Number': ("%d" % (preProcessBrief["papersWoS"])),
            'Percentage': ("%.1f%%" % (percentagePapersWos))
        })
        logWriter.writerow({
            'Info': 'Papers from Scopus',
            'Number': ("%d" % (preProcessBrief["papersScopus"])),
            'Percentage': ("%.1f%%" % (percentagePapersScopus))
        })

        # Statics after removing duplicates
        if not args.noRemDupl:
            logWriter.writerow({'Info': ''})
            logWriter.writerow(
                {'Info': 'Statics after duplication removal filter'})
            paperUtils.sourcesStatics(paperDict, logWriter)

        # Save final results
        paperSave.saveResults(
            paperDict,
            os.path.join(globalVar.DATA_OUT_FOLDER,
                         globalVar.OUTPUT_FILE_NAME))

        # Close log file
        logFile.close()

        graphUtils.grapPreprocess(plt, preProcessBrief)

        if args.graphTitle:
            plt.title(args.graphTitle)

        # Saving graph
        plt.tight_layout()

        if args.savePlot == "":
            if self.fromGui:
                plt.show(block=False)
            else:
                plt.show(block=True)
        else:
            plt.savefig(os.path.join(globalVar.GRAPHS_OUT_FOLDER,
                                     args.savePlot),
                        bbox_inches='tight',
                        pad_inches=0.01)
            print("Plot saved on: " +
                  os.path.join(globalVar.GRAPHS_OUT_FOLDER, args.savePlot))

        if args.savePlot == "":
            if self.fromGui:
                plt.show()

        return len(paperDict)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: generateBibtex.py Projeto: nvdinh/ScientoPy

def generateBibtex(inputLatexFile):
    print("\n\nGenerating BibTeX")
    print("=================\n")

    fileobject = (open(inputLatexFile, 'r'))
    rawtext = fileobject.read()
    fileobject.close()

    start = '\\begin{document}'
    end = '\\begin{thebibliography}'
    bodytext = rawtext[rawtext.find(start) + len(start):rawtext.rfind(end)]

    # Extracts the cites keys
    citesDict = {}
    for char in range(0, len(bodytext) - 10):
        if bodytext[char:char + 6] == '\\cite{':
            cite = ''
            char += len('\\cite{')
            while (bodytext[char] != '}'):
                if (bodytext[char] == ' '):
                    char += 1
                elif (bodytext[char] == ','):
                    char += 1
                    if cite in citesDict.keys():
                        cite = ''
                    else:
                        citesDict[cite] = False
                        cite = ''
                else:
                    cite += (bodytext[char])
                    char += 1
            if cite in citesDict.keys():
                pass
            else:
                citesDict[cite] = False

    print("%d cites found." % len(citesDict))

    # Start paper list empty
    papersDict = []
    papersToBib = []

    # Open the storage database and add to papersDict
    INPUT_FILE = os.path.join(globalVar.DATA_OUT_FOLDER, globalVar.OUTPUT_FILE_NAME)
    ifile = open(INPUT_FILE, "r", encoding='utf-8')
    print("Reading file: %s" % (INPUT_FILE))
    paperUtils.openFileToDict(ifile, papersDict)
    ifile.close()
    print("Loaded %d docuemnts" % (len(papersDict)))

    # Find the number of total papers per year
    count = 1
    for paper in papersDict:
        # print("%d, %s" % (count, paper["title"]))
        # count += 1
        if paper["eid"] in citesDict.keys():
            if citesDict[paper["eid"]] == False:
                print("Added paper(%s): %s" % (paper["eid"], paper["title"]))
                papersToBib.append(paper)
                citesDict[paper["eid"]] = True

    OUT_FILE = os.path.join(globalVar.LATEX_EXAMPLE_FOLDER, globalVar.OUTPUT_FILE_BIB)
    ofile = open(OUT_FILE, 'w', encoding='utf-8')

    for paper in papersToBib:
        authorsNames = paper["authorFull"]
        if paper["dataBase"] == "Scopus":
            authorsNames = authorsNames.replace(",", ";")
            authorsNames = authorsNames.split(";")
            authorsNames = [x.strip() for x in authorsNames]
            authorsNames = [x.replace(" ", ", ", 1) for x in authorsNames]
            authorsNames = " and ".join(authorsNames)

        if paper["dataBase"] == "WoS":
            authorsNames = authorsNames.replace("; ", " and ")

        # Preprocess fields
        paper["title"] = unidecode.unidecode(paper["title"])
        paper["title"] = paper["title"].replace("&", "\&").replace("_", "\_")
        paper["title"] = paper["title"].replace('"', '``', 1).replace('"', "''")

        paper["sourceTitle"] = unidecode.unidecode(paper["sourceTitle"])
        paper["sourceTitle"] = paper["sourceTitle"].replace("&", "\&").replace("_", "\_")

        paper["pageCount"] = paper["pageCount"].replace("&", "\&").replace("_", "\_")
        paper["publisher"] = paper["publisher"].replace("&", "\&").replace("_", "\_")
        paper["publisherAddress"] = paper["publisherAddress"].replace("&", "\&").replace("_", "\_")
        paper["conferenceTitle"] = paper["conferenceTitle"].replace("&", "\&").replace("_", "\_")
        paper["conferenceLocation"] = paper["conferenceLocation"].replace("&", "\&").replace("_", "\_")
        paper["conferenceDate"] = paper["conferenceDate"].replace("&", "\&").replace("_", "\_")

        if (paper["documentType"].split(";")[0] in ["Article", "Review", "Article in Press"]):
            ofile.write('@Article{%s,\n' % paper["eid"])
            ofile.write('  Author \t=\t"%s",\n' % authorsNames)
            ofile.write('  Title\t\t=\t"%s",\n' % paper["title"])
            ofile.write('  Journal \t=\t"%s",\n' % paper["sourceTitle"])
            if paper["pageCount"]:
                ofile.write('  Numpages\t=\t"%s",\n' % paper["pageCount"])
            if paper["pageSart"] and paper["pageEnd"]:
                ofile.write('  Pages \t=\t"%s-%s",\n' % (paper["pageSart"], paper["pageEnd"]))
            if paper["volume"]:
                ofile.write('  Volume \t=\t"%s",\n' % paper["volume"])
            if paper["artNo"]:
                ofile.write('  Article-Number \t=\t"%s",\n' % paper["artNo"])
            ofile.write('  Year \t\t=\t"%s",\n' % paper["year"])
            if paper["issn"]:
                ofile.write('  ISSN \t\t=\t"%s",\n' % paper["issn"])
            if paper["isbn"]:
                ofile.write('  ISBN \t\t=\t"%s",\n' % paper["isbn"])
            if paper["doi"]:
                ofile.write('  DOI \t\t=\t"%s",\n' % paper["doi"])
            ofile.write('}\n\n\n')

        if (paper["documentType"].split(";")[0] in ["Conference Paper", "Proceedings Paper", ]):
            ofile.write('@Inproceedings{%s,\n' % paper["eid"])
            ofile.write('  Author \t=\t"%s",\n' % authorsNames)
            ofile.write('  Title\t\t=\t"%s",\n' % paper["title"])
            if paper["publisher"]:
                ofile.write('  Publisher \t=\t"%s",\n' % paper["publisher"])
            if paper["publisherAddress"]:
                ofile.write('  Numpages\t=\t"%s",\n' % paper["publisherAddress"])
            if paper["conferenceTitle"] and paper["conferenceLocation"] and paper["conferenceDate"]:
                ofile.write('  Note\t\t=\t"In Proceedings of the %s, %s, %s",\n' %
                            (paper["conferenceTitle"], paper["conferenceLocation"], paper["conferenceDate"]))
            elif paper["conferenceTitle"] and paper["conferenceDate"]:
                ofile.write('  Note\t\t=\t"In  {Proceedings of the } %s, %s",\n' %
                            (paper["conferenceTitle"], paper["conferenceDate"]))
            if paper["pageCount"]:
                ofile.write('  Address\t=\t"%s",\n' % paper["pageCount"])
            if paper["pageSart"] and paper["pageEnd"]:
                ofile.write('  Pages \t=\t"%s-%s",\n' % (paper["pageSart"], paper["pageEnd"]))
            if paper["volume"]:
                ofile.write('  Volume \t=\t"%s",\n' % paper["volume"])
            if paper["artNo"]:
                ofile.write('  Article-Number \t=\t"%s",\n' % paper["artNo"])
            ofile.write('  Year \t\t=\t"%s",\n' % paper["year"])
            if paper["issn"]:
                ofile.write('  ISSN \t\t=\t"%s",\n' % paper["issn"])
            if paper["isbn"]:
                ofile.write('  ISBN \t\t=\t"%s",\n' % paper["isbn"])
            if paper["doi"]:
                ofile.write('  DOI \t\t=\t"%s",\n' % paper["doi"])
            ofile.write('}\n\n\n')

    print("\nFinished, total references generated: %d\n" % len(papersToBib))
    ofile.close()

    return OUT_FILE

Exemplo n.º 3

0

Exibir arquivo

Arquivo: generateBibtex.py Projeto: dradix/ScientoPy

        else:
            citesDict[cite] = False

print("%d cites found." % len(citesDict))
print(citesDict)

# Start paper list empty
papersDict = []
papersToBib = []

# Open the storage database and add to papersDict
INPUT_FILE = os.path.join(globalVar.DATA_OUT_FOLDER,
                          globalVar.OUTPUT_FILE_NAME)
ifile = open(INPUT_FILE, "r", encoding='utf-8')
print("Reading file: %s" % (INPUT_FILE))
paperUtils.openFileToDict(ifile, papersDict)
ifile.close()
print("Loaded %d docuemnts" % (len(papersDict)))

# Find the number of total papers per year
count = 1
for paper in papersDict:
    #print("%d, %s" % (count, paper["title"]))
    #count += 1
    if paper["eid"] in citesDict.keys():
        if citesDict[paper["eid"]] == False:
            print("Added paper(%s): %s" % (paper["eid"], paper["title"]))
            papersToBib.append(paper)
            citesDict[paper["eid"]] = True

OUT_FILE = os.path.join(globalVar.RESULTS_FOLDER, globalVar.OUTPUT_FILE_BIB)

Exemplo n.º 4

0

Exibir arquivo

    def scientoPy(self, args=''):
        if args == '':
            args = self

        print("\n\nScientoPy: %s" % (globalVar.SCIENTOPY_VERSION))
        print("================\n")

        # Check python version
        if sys.version_info[0] < 3:
            print("ERROR, you are using Python 2, Python 3.X.X required")
            print("")
            exit()

        # Validate window Width
        if args.windowWidth < 1:
            print("ERROR: minimum windowWidth 1")
            exit()

        # Validate start and end years
        if args.startYear > args.endYear:
            print("ERROR: startYear > endYear")
            exit()

        # Create output folders if not exist
        if not os.path.exists(os.path.join(globalVar.GRAPHS_OUT_FOLDER)):
            os.makedirs(os.path.join(globalVar.GRAPHS_OUT_FOLDER))
        if not os.path.exists(os.path.join(globalVar.RESULTS_FOLDER)):
            os.makedirs(os.path.join(globalVar.RESULTS_FOLDER))

        # Select the input file
        if args.previousResults:
            INPUT_FILE = os.path.join(globalVar.RESULTS_FOLDER,
                                      globalVar.OUTPUT_FILE_NAME)
        else:
            INPUT_FILE = os.path.join(globalVar.DATA_OUT_FOLDER,
                                      globalVar.OUTPUT_FILE_NAME)

        # Start the output list empty
        papersDictOut = []
        topicList = []

        loadDataSet = False

        if len(self.papersDict) == 0 or args.previousResults:
            loadDataSet = True

        if args.previousResults == False and self.lastPreviousResults == True:
            loadDataSet = True

        # Open the dataset only if not loaded in papersDict
        if loadDataSet:
            self.papersDict = []
            self.lastPreviousResults = args.previousResults
            # Open the storage database and add to sel.fpapersDict
            if not os.path.isfile(INPUT_FILE):
                print("ERROR: %s file not found" % INPUT_FILE)
                print(
                    "Make sure that you have run the preprocess step before run scientoPy"
                )
                exit()

            ifile = open(INPUT_FILE, "r", encoding='utf-8')
            print("Reading file: %s" % (INPUT_FILE))
            paperUtils.openFileToDict(ifile, self.papersDict)
            ifile.close()

            print("Scopus papers: %s" % globalVar.papersScopus)
            print("WoS papers: %s" % globalVar.papersWoS)
            print("Omited papers: %s" % globalVar.omitedPapers)
            print("Total papers: %s" % len(self.papersDict))

        # Create a yearArray
        yearArray = range(args.startYear, args.endYear + 1)
        yearPapers = {}
        for i in range(args.startYear, args.endYear + 1):
            yearPapers[i] = 0

        # Filter papers with invalid year
        self.papersDict = list(
            filter(lambda x: x["year"].isdigit(), self.papersDict))
        # Filter the papers outside the year range
        papersDictInside = self.papersDict.copy()
        papersDictInside = list(
            filter(lambda x: int(x["year"]) >= args.startYear,
                   papersDictInside))
        papersDictInside = list(
            filter(lambda x: int(x["year"]) <= args.endYear, papersDictInside))

        print("Total papers in range (%s - %s): %s" %
              (args.startYear, args.endYear, len(papersDictInside)))

        # If no papers in the range exit
        if (len(papersDictInside) == 0):
            print("ERROR: no papers found in the range.")
            del papersDictInside
            return

        # Find the number of total papers per year
        for paper in papersDictInside:
            if int(paper["year"]) in yearPapers.keys():
                yearPapers[int(paper["year"])] += 1

        # Get the filter options
        filterSubTopic = ""
        if args.filter:
            filterSubTopic = args.filter.strip()
            print("Filter Sub Topic: %s" % filterSubTopic)

        # Parse custom topics
        if args.topics:
            print("Custom topics entered:")

            # Divide the topics by ;
            topicsFirst = args.topics.split(";")

            for x in topicsFirst:
                topicList.append(x.split(","))

            # Remove beginning and ending space from topics, and empty topics
            for topic in topicList:
                for idx, item in enumerate(topic):
                    topic[idx] = item.strip()
                    if not topic[idx]:
                        topic.remove(topic[idx])
                if not topic:
                    topicList.remove(topic)

            # Remove for each sub topic, start and end spaces
            for item1 in topicList:
                for item2 in item1:
                    item2 = item2.strip()

            for topic in topicList:
                print(topic)

        # Find the top topics
        else:
            print("Finding the top topics...")

            topicDic = {}

            # For each paper, get the full topicDic
            for paper in papersDictInside:

                # For each item in paper criteria
                for item in paper[args.criterion].split(";"):
                    # Strip paper item and upper case
                    item = item.strip()
                    item = item.upper()

                    # If paper item empty continue
                    if item == "":
                        continue

                    # If filter sub topic, omit items outside that do not match with the subtopic
                    if filterSubTopic != "" and len(item.split(",")) >= 2:
                        if (item.split(",")[1].strip().upper() !=
                                filterSubTopic.upper()):
                            continue

                    # If topic already in topicDic
                    if item in topicDic:
                        topicDic[item] += 1
                    # If topic is not in topicDic, create this in topicDic
                    else:
                        topicDic[item] = 1

                    # If onlyFirst, only keep the firt processesing
                    if args.onlyFirst:
                        break

            # If trending analysis, the top topic list to analyse is bigger
            if args.trend:
                topicListLength = globalVar.TOP_TREND_SIZE
                startList = 0
            else:
                topicListLength = args.length
                startList = args.skipFirst

            # Get the top topics by the topDic count
            topTopcis = sorted(
                topicDic.items(),
                key=lambda x: -x[1])[startList:(startList + topicListLength)]

            # Put the topTopics in topic List
            for topic in topTopcis:
                topicList.append([topic[0]])

            if len(topicList) == 0:
                print(
                    "\nFINISHED : There is not results with your inputs criteria or filter"
                )
                del papersDictInside
                return

        # print("Topic list:")
        # print(topicList)

        # Create a dictonary in topicResults list per element in topicList
        topicResults = []
        for topics in topicList:
            topicItem = {}
            topicItem["upperName"] = topics[0].upper()
            # If the topic name was given as an argument, use the first one given, else keep empty to use the first one found
            if args.topics:
                topicItem["name"] = topics[0]
            else:
                topicItem["name"] = ""
            topicItem["allTopics"] = topics
            topicItem["year"] = yearArray
            topicItem["PapersCount"] = [0] * len(yearArray)
            topicItem["PapersCountAccum"] = [0] * len(yearArray)
            topicItem["PapersCountRate"] = [0] * len(yearArray)
            topicItem["PapersTotal"] = 0
            topicItem["AverageDocPerYear"] = 0  # ADY
            topicItem["PapersInLastYears"] = 0
            topicItem["PerInLastYears"] = 0  # PDLY
            topicItem["CitedByCount"] = [0] * len(yearArray)
            topicItem["CitedByCountAccum"] = [0] * len(yearArray)
            topicItem["CitedByTotal"] = 0
            topicItem["papers"] = []
            topicItem["topicsFound"] = []
            topicItem["hIndex"] = 0
            topicItem["agr"] = 0  # Average growth rate
            topicResults.append(topicItem)

        # Find papers within the arguments, and fill the topicResults fields per year.
        print("Calculating papers sum...")
        # For each paper
        for paper in papersDictInside:
            # For each item in paper criteria
            for item in paper[args.criterion].split(";"):
                # Strip paper item and upper
                item = item.strip()
                itemUp = item.upper()

                # For each topic in topic results
                for topicItem in topicResults:
                    # for each sub topic
                    for subTopic in topicItem["allTopics"]:

                        # Check if the sub topic match with the paper item
                        if args.topics and "*" in subTopic.upper():
                            subTopicRegex = subTopic.upper().replace("*", ".*")
                            p = re.compile(subTopicRegex)
                            match = p.match(itemUp)
                        else:
                            match = subTopic.upper() == itemUp

                        # If match, sum it to the topicItem
                        if match:
                            yearIndex = topicItem["year"].index(
                                int(paper["year"]))
                            topicItem["PapersCount"][yearIndex] += 1
                            topicItem["PapersTotal"] += 1
                            topicItem["CitedByCount"][yearIndex] += int(
                                paper["citedBy"])
                            topicItem["CitedByTotal"] += int(paper["citedBy"])
                            # If no name in the topicItem, put the first one that was found
                            if topicItem["name"] == "":
                                topicItem["name"] = item
                            topicItem["papers"].append(paper)
                            # Add the matched paper to the papersDictOut
                            papersDictOut.append(paper)

                            # If it is a new topic, add it to topicItem["topicsFound"]
                            if itemUp not in [
                                    x.upper() for x in topicItem["topicsFound"]
                            ]:
                                topicItem["topicsFound"].append(item)
                # Only process one (the first one) if args.onlyFirst
                if args.onlyFirst:
                    break

        # Print the topics found if the asterisk willcard was used
        for topicItem in topicResults:
            for subTopic in topicItem["allTopics"]:
                if args.topics and "*" in subTopic.upper():
                    print("\nTopics found for %s:" % subTopic)
                    print('"' + ';'.join(topicItem["topicsFound"]) + '"')
                    print("")

        print("Calculating accumulative ...")
        # Extract accumulative
        for topicItem in topicResults:
            citedAccumValue = 0
            papersAccumValue = 0
            for i in range(0, len(topicItem["CitedByCountAccum"])):
                citedAccumValue += topicItem["CitedByCount"][i]
                topicItem["CitedByCountAccum"][i] = citedAccumValue

                papersAccumValue += topicItem["PapersCount"][i]
                topicItem["PapersCountAccum"][i] = papersAccumValue

        print("Calculating Average Growth Rate (AGR)...")
        # Extract the Average Growth Rate (AGR)
        for topicItem in topicResults:
            # Calculate rates
            pastCount = 0
            # Per year with papers count data
            for i in range(0, len(topicItem["PapersCount"])):
                topicItem["PapersCountRate"][
                    i] = topicItem["PapersCount"][i] - pastCount
                pastCount = topicItem["PapersCount"][i]

            # Calculate AGR from rates
            endYearIndex = len(topicItem["year"]) - 1
            startYearIndex = endYearIndex - (args.windowWidth - 1)

            topicItem["agr"] = \
                round(np.mean(topicItem["PapersCountRate"][startYearIndex: endYearIndex + 1]), 1)

        print("Calculating Average Documents per Year (ADY)...")
        # Extract the Average Documents per Year (ADY)
        for topicItem in topicResults:

            # Calculate ADY from rates
            endYearIndex = len(topicItem["year"]) - 1
            startYearIndex = endYearIndex - (args.windowWidth - 1)

            topicItem["AverageDocPerYear"] = \
                round(np.mean(topicItem["PapersCount"][startYearIndex: endYearIndex + 1]), 1)

            topicItem["PapersInLastYears"] = \
                np.sum(topicItem["PapersCount"][startYearIndex: endYearIndex + 1])

            if topicItem["PapersTotal"] > 0:
                topicItem["PerInLastYears"] = \
                    round(100 * topicItem["PapersInLastYears"] / topicItem["PapersTotal"], 1)

        # Scale in percentage per year
        if args.pYear:
            for topicItem in topicResults:
                for year, value in yearPapers.items():
                    index = topicItem["year"].index(year)
                    if value != 0:
                        topicItem["PapersCount"][index] /= (float(value) /
                                                            100.0)

        print("Calculating h-index...")
        # Calculate h index per topic
        for topicItem in topicResults:

            # print("\n" + topicName)

            # Sort papers by cited by count
            papersIn = topicItem["papers"]
            papersIn = sorted(papersIn,
                              key=lambda x: int(x["citedBy"]),
                              reverse=True)

            count = 1
            hIndex = 0
            for paper in papersIn:
                # print(str(count) + ". " + paper["citedBy"])
                if int(paper["citedBy"]) >= count:
                    hIndex = count
                count += 1
                # print("hIndex: " + str(hIndex))
                topicItem["hIndex"] = hIndex

        # Sort by PapersTotal, and then by name.
        topicResults = sorted(topicResults,
                              key=lambda x: x["name"],
                              reverse=False)
        topicResults = sorted(topicResults,
                              key=lambda x: int(x["PapersTotal"]),
                              reverse=True)

        # If trend analysis, sort by agr, and get the first ones
        if args.trend:
            topicResults = sorted(topicResults,
                                  key=lambda x: int(x["agr"]),
                                  reverse=True)
            topicResults = topicResults[args.skipFirst:(args.skipFirst +
                                                        args.length)]

        # Print top topics
        print("\nTop topics:")
        print(
            "Average Growth Rate (AGR) and Average Documents per Year (ADY) period: %d - %d\n\r"
            % (yearArray[startYearIndex], yearArray[endYearIndex]))
        print('-' * 87)
        print("{:<4s}{:<30s}{:>10s}{:>10s}{:>10s}{:>10s}{:>12s}".format(
            "Pos", args.criterion, "Total", "AGR", "ADY", "PDLY", "h-index"))
        print('-' * 87)
        count = 0
        for topicItem in topicResults:
            print("{:<4d}{:<30s}{:>10d}{:>10.1f}{:>10.1f}{:>10.1f}{:>10d}".
                  format(count + 1, topicItem["name"],
                         topicItem["PapersTotal"], topicItem["agr"],
                         topicItem["AverageDocPerYear"],
                         topicItem["PerInLastYears"], topicItem["hIndex"]))
            count += 1
        print('-' * 87)
        print("")

        if filterSubTopic != "":
            for topicItem in topicResults:
                topicItem["name"] = topicItem["name"].split(",")[0].strip()

        # If more than 100 results and not wordCloud, no plot.
        if len(
                topicResults
        ) > 100 and not args.graphType == "word_cloud" and not args.noPlot:
            args.noPlot = True
            print("\nERROR: Not allowed to graph more than 100 results")

        # Plot
        if not args.noPlot:
            if args.graphType == "evolution":
                graphUtils.plot_evolution(plt, topicResults,
                                          yearArray[startYearIndex],
                                          yearArray[endYearIndex], args)

            if args.graphType == "word_cloud":
                from wordcloud import WordCloud
                my_dpi = 96
                plt.figure(figsize=(1960 / my_dpi, 1080 / my_dpi), dpi=my_dpi)

                if args.wordCloudMask:
                    imageMask = np.array(Image.open(args.wordCloudMask))
                    wc = WordCloud(background_color="white",
                                   max_words=5000,
                                   width=1960,
                                   height=1080,
                                   colormap="tab10",
                                   mask=imageMask)
                else:
                    wc = WordCloud(background_color="white",
                                   max_words=5000,
                                   width=1960,
                                   height=1080,
                                   colormap="tab10")

                freq = {}
                for topicItem in topicResults:
                    freq[topicItem["name"]] = topicItem["PapersTotal"]
                # generate word cloud
                wc.generate_from_frequencies(freq)

                # show
                plt.imshow(wc, interpolation="bilinear")
                plt.axis("off")

            if args.graphType == "bar":
                graphUtils.plot_bar_horizontal(plt, topicResults, args)

            if args.graphType == "bar_trends":
                graphUtils.plot_bar_horizontal_trends(
                    plt, topicResults, yearArray[startYearIndex],
                    yearArray[endYearIndex], args)
            if args.graphType == "time_line":
                graphUtils.plot_time_line(plt, topicResults, False)
                fig = plt.gcf()
                fig.set_size_inches(args.plotWidth, args.plotHeight)

                if args.yLog:
                    plt.yscale('log')
                    # TODO: Fix mticker
                    # plt.gca().yaxis.set_minor_formatter(mticker.ScalarFormatter())

                if args.pYear:
                    plt.ylabel("% of documents per year")

            if args.graphTitle:
                # plt.title(args.graphTitle)
                fig = plt.gcf()
                fig.suptitle(args.graphTitle, y=1.0)
                plt.tight_layout(rect=[0, 0, 1, 0.95])
            else:
                plt.tight_layout()

            if args.savePlot == "":
                if self.fromGui:
                    plt.show(block=False)
                else:
                    plt.show(block=True)
            else:
                plt.savefig(os.path.join(globalVar.GRAPHS_OUT_FOLDER,
                                         args.savePlot),
                            bbox_inches='tight',
                            pad_inches=0.01)
                print("Plot saved on: " +
                      os.path.join(globalVar.GRAPHS_OUT_FOLDER, args.savePlot))

        self.resultsFileName = paperSave.saveTopResults(
            topicResults, args.criterion)
        self.extResultsFileName = paperSave.saveExtendedResults(
            topicResults, args.criterion)

        # Only save results if that is result of a not previous result
        if not args.previousResults:
            paperSave.saveResults(
                papersDictOut,
                os.path.join(globalVar.RESULTS_FOLDER,
                             globalVar.OUTPUT_FILE_NAME))
        if args.savePlot == "":
            if self.fromGui:
                plt.show()

        del papersDictInside