Python SpringerReferenceParser примеры использования

Язык программирования: Python

Пространство имен/Пакет: ReferenceParser

Примеров на hotexamples.com: 4

Python SpringerReferenceParser - 4 примера найдено. Это лучшие примеры Python кода для ReferenceParser.SpringerReferenceParser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SpringerReferenceParser(2)

citeParse(2)

Пример #1

Показать файл

Файл: academicThings.py Проект: AnkaiJie/Google-Scholar-Citation-Fraud-Data-Collector

    def findAllCitations(self):
        ref_processor = PaperReferenceExtractor()

        ref_content = ref_processor.getReferencesContent(self.__pdfObj)

        if (self.getInfo()['Publisher'] == 'Springer US'):
            parser = SpringerReferenceParser()
        elif (self.getInfo()['Publisher'] == 'IEEE'):
            parser = IeeeReferenceParser()
        else:
            raise Exception('Publisher not recognized; no citation parser for this format')

        citation_list = parser.citeParse(ref_content)

        for idx, citation in enumerate(citation_list):
            citation = Citation(citation)
            citation_list[idx] = citation

        return citation_list

Пример #2

Показать файл

    def findAllCitations(self):
        ref_processor = PaperReferenceExtractor()

        ref_content = ref_processor.getReferencesContent(self.__pdfObj)

        if (self.getInfo()['Publisher'] == 'Springer US'):
            parser = SpringerReferenceParser()
        elif (self.getInfo()['Publisher'] == 'IEEE'):
            parser = IeeeReferenceParser()
        else:
            raise Exception(
                'Publisher not recognized; no citation parser for this format')

        citation_list = parser.citeParse(ref_content)

        for idx, citation in enumerate(citation_list):
            citation = Citation(citation)
            citation_list[idx] = citation

        return citation_list

Пример #3

Показать файл

Файл: scrapper.py Проект: AnkaiJie/Google-Scholar-Citation-Fraud-Data-Collector

def count_cross_cites (author, x_most_rel, top_x, y_most_rel):
    author.loadPapers(x_most_rel, pubFilter=True, delay=True)
    paper_list = author.getPapers()
    x_most_rel = len(paper_list)
    ORIG_FNAME = author.getFirstName()
    ORIG_LNAME = author.getLastName()
    print("Total number of valid GSC papers: " + str(len(paper_list)))
    citation_list = []

    springer_bot = SpringerReferenceParser()
    ieee_bot = IeeeReferenceParser()

    # gets all the citations from all the papers in the list
    print('STAGE 1 GETTING CITATIONS')
    print("-----------------------------------------------------------")
    for paper in paper_list:
        pub = paper.getInfo()['Publisher']
        pdf_paper = paper.getPdfObj()
        print('Paper title: ' + str(paper.getInfo()['Title']))
        if (pdf_paper is None):
            print('paper object is none')
            continue

        extractor = PaperReferenceExtractor()
        ref_content = extractor.getReferencesContent(pdf_paper)

        if (ref_content is None):
            continue
        try:
            if (pub == 'IEEE'):
                citations = ieee_bot.citeParse(ref_content)
            elif (pub == 'Springer US'):
                citations = springer_bot.citeParse(ref_content)
            else:
                print('Invalid publication format from: ' + pub)
                continue
        except Exception as e:
            print('An exception occured with parsing citations: ' + str(e))

        citation_list += citations
    print("STAGE 1 COMPLETE -----------------------------------------------------------")
    print('From the valid top ' + str(top_x) +' papers, all the citations found: ' + str(citation_list))

    author_dist = {}

    #goes through each citation and takes out authors and paper names and puts it in the valid frequency dictionary
    # end results: {'author': {'freq': int frequency original author cites him, 'paper': [array of paper titles in which the cited author is cited]}, 
    print('STAGE 2 AGGREGATING CITATION COUNTS BY AUTHOR ------------------------------------')

    for citation in citation_list:
        title = citation['title']
        for cited_author in citation['authors']:
            if cited_author in author_dist:
                author_dist[cited_author]['freq'] += 1
                if title not in author_dist[cited_author]['papers']:
                    author_dist[cited_author]['papers'].append(title)
            else:
                author_dist[cited_author] = {}
                author_dist[cited_author]['freq'] = 1
                author_dist[cited_author]['papers'] = [title]


    #sorts the dictionary - now an array of tuples that are sorted by frequency
    #author_dist should be in the form [('author', {'freq': 5, 'papers':[]}), ...]
    author_dist = list(reversed(sorted(author_dist.items(), key=lambda x: x[1]['freq'])))
    print('STAGE 2 COMPLETE -----------------------------------------------------------------')
    print('sorted author list in tuples:')
    print(author_dist)

    count_cross_cites_stage3(author, author_dist, x_most_rel, top_x, y_most_rel)

Пример #4

Показать файл

def count_cross_cites(author, x_most_rel, top_x, y_most_rel):
    author.loadPapers(x_most_rel, pubFilter=True, delay=True)
    paper_list = author.getPapers()
    x_most_rel = len(paper_list)
    ORIG_FNAME = author.getFirstName()
    ORIG_LNAME = author.getLastName()
    print("Total number of valid GSC papers: " + str(len(paper_list)))
    citation_list = []

    springer_bot = SpringerReferenceParser()
    ieee_bot = IeeeReferenceParser()

    # gets all the citations from all the papers in the list
    print('STAGE 1 GETTING CITATIONS')
    print("-----------------------------------------------------------")
    for paper in paper_list:
        pub = paper.getInfo()['Publisher']
        pdf_paper = paper.getPdfObj()
        print('Paper title: ' + str(paper.getInfo()['Title']))
        if (pdf_paper is None):
            print('paper object is none')
            continue

        extractor = PaperReferenceExtractor()
        ref_content = extractor.getReferencesContent(pdf_paper)

        if (ref_content is None):
            continue
        try:
            if (pub == 'IEEE'):
                citations = ieee_bot.citeParse(ref_content)
            elif (pub == 'Springer US'):
                citations = springer_bot.citeParse(ref_content)
            else:
                print('Invalid publication format from: ' + pub)
                continue
        except Exception as e:
            print('An exception occured with parsing citations: ' + str(e))

        citation_list += citations
    print(
        "STAGE 1 COMPLETE -----------------------------------------------------------"
    )
    print('From the valid top ' + str(top_x) +
          ' papers, all the citations found: ' + str(citation_list))

    author_dist = {}

    #goes through each citation and takes out authors and paper names and puts it in the valid frequency dictionary
    # end results: {'author': {'freq': int frequency original author cites him, 'paper': [array of paper titles in which the cited author is cited]},
    print(
        'STAGE 2 AGGREGATING CITATION COUNTS BY AUTHOR ------------------------------------'
    )

    for citation in citation_list:
        title = citation['title']
        for cited_author in citation['authors']:
            if cited_author in author_dist:
                author_dist[cited_author]['freq'] += 1
                if title not in author_dist[cited_author]['papers']:
                    author_dist[cited_author]['papers'].append(title)
            else:
                author_dist[cited_author] = {}
                author_dist[cited_author]['freq'] = 1
                author_dist[cited_author]['papers'] = [title]

    #sorts the dictionary - now an array of tuples that are sorted by frequency
    #author_dist should be in the form [('author', {'freq': 5, 'papers':[]}), ...]
    author_dist = list(
        reversed(sorted(author_dist.items(), key=lambda x: x[1]['freq'])))
    print(
        'STAGE 2 COMPLETE -----------------------------------------------------------------'
    )
    print('sorted author list in tuples:')
    print(author_dist)

    count_cross_cites_stage3(author, author_dist, x_most_rel, top_x,
                             y_most_rel)