Пример #1
0
    def extract_paper_info(self):
        # DOI
        doi = self.soup.teiheader.find("idno", type="DOI")
        if doi:
            self.paper.doi = elem_to_text(doi)
        elif self.document:
            self.paper.doi = self.document['doi']
        # Title
        title = self.soup.teiheader.find("title")
        if title:
            self.paper.title = elem_to_text(title)
        # Authors
        authors = self.get_authors(self.soup.analytic.find_all('author'))
        if authors:
            self.paper.authors = authors
        if self.soup.abstract:
            self.paper.abstract = elem_to_text(self.soup.abstract)
        # Year
        published = self.soup.analytic.find("publicationstmt")
        if published:
            self.paper.year = elem_to_text(published.find("date", type="when"))
        # Organization / Affiliations
        affiliations = self.soup.analytic.find_all('affiliation')
        for affiliation in affiliations:
            org = Organization()
            org.type = "institution"
            org.name = elem_to_text(affiliation.find("orgname", type="institution"))
            address = Address()
            addr = affiliation.find("address")
            if addr:
                address.place = elem_to_text(addr.find("settlement"))
                address.region = elem_to_text(addr.find("region"))
                address.country = elem_to_text(addr.find("country"))
            org.address = address
            self.paper.affiliations.append(org)
        # University Ranking
        if self.paper.affiliations:
            if self.paper.affiliations[0] != '':
                self.paper.uni_rank = self.uni_rank.get_rank(self.paper.affiliations[0].name)
            elif len(self.paper.affiliations) > 1:
                self.paper.uni_rank = self.uni_rank.get_rank(self.paper.affiliations[1].name)
        else:
            self.paper.uni_rank = self.uni_rank.get_rank('Random')
        # Citations
        bibliography = self.soup.listbibl.find_all('biblstruct')
        for bibl in bibliography:
            citation = Citation()
            cited_paper = bibl.analytic
            if cited_paper:
                citation.title = elem_to_text(cited_paper.find("title", type="main"))
                citation_authors = self.get_authors(cited_paper.find_all("author"))
                citation.doi = elem_to_text(cited_paper.find("idno", type="DOI"))
                if citation_authors:
                    citation.authors = citation_authors
            cited_journal = bibl.monogr
            if cited_journal:
                citation.source = elem_to_text(cited_journal.find("title"))
                try:
                    citation.publish_year = cited_journal.imprint.date['when']
                except TypeError:
                    pass
            self.paper.citations.append(citation)
        # NER - Ack pairs - Funding status
        self.paper.ack_pairs = self.get_funding_status()
        er_list = [org for (entity, org) in self.paper.ack_pairs]
        if 'ORG' in er_list:
            self.paper.funded = 1
        else:
            self.paper.funded = 0
        # SJR
        api_resp = self.get_sjr(self.paper.doi, self.paper.title,self.db)

        # Adding the paragraphs from the paper to the corpus
        extractor = ClaimEvidenceExtractor(self.xml, self.soup,self.test_csv) 
        os.chdir("/scifact/")
        extractor.make_corpus()

        # Get response for claim evidence using request to API
        response =  requests.get('http://0.0.0.0:8000/getclaimevidence')
        print(response)

        self.support, self.refute, self.ratio = extractor.get_results()
        print('support:',self.support,'refute:',self.refute,'ratio:',self.ratio)

        os.chdir("../")

        if api_resp:
            self.paper.cited_by_count = api_resp["num_citations"]
            self.paper.sjr = api_resp["sjr"]
            self.paper.subject = api_resp["subject"]
            self.paper.subject_code = api_resp["subject_code"]
            self.paper.normalized = api_resp["normalized_citations"]
            self.paper.velocity = api_resp["citationVelocity"]
            self.paper.influentialcitations = api_resp["influentialCitationCount"]
            self.paper.references = api_resp["references_count"]
            self.paper.flag = api_resp["openaccessflag"]
            self.paper.influentialref = api_resp["influentialReferencesCount"]
            self.paper.ref_background = api_resp["reference_background"]
            self.paper.ref_result = api_resp["reference_result"]
            self.paper.ref_method = api_resp["reference_methodology"]
            self.paper.cite_background = api_resp["citations_background"]
            self.paper.cite_result = api_resp["citations_result"]
            self.paper.cite_method = api_resp["citations_methodology"]
            self.paper.cite_next = api_resp["citations_next"]
            self.paper.influential_references_methodology = api_resp["upstream_influential_methodology_count"]
            self.paper.issn = api_resp["ISSN"]
            self.paper.auth = api_resp["authors"]
            self.paper.age = api_resp["age"]
            if api_resp["abstract"]:
                self.paper.abstract = api_resp["abstract"]
        # Set self-citations
        self.paper.self_citations = self.paper.set_self_citations()
        # return paper
        #calculate coCitations
        t2,t3 = coCite(self.paper.doi, self.db)
        
        #calculate NLP features
        reading_score = self.get_reading_score(self.paper.abstract)
        subjectivity = self.get_subjectivity(self.paper.abstract)
        sentiment = self.get_sentiment(self.paper.abstract)
        
        return {"doi": self.paper.doi, "title": self.paper.title, "num_citations": self.paper.cited_by_count,
                "author_count": len(self.paper.authors),"sjr": self.paper.sjr, "u_rank": self.paper.uni_rank,
                "funded": self.paper.funded,"self_citations": self.paper.self_citations, "subject": self.paper.subject,
                "subject_code": self.paper.subject_code, "citationVelocity": self.paper.velocity,
                "influentialCitationCount": self.paper.influentialcitations, "references_count": self.paper.references,
                "openaccessflag": self.paper.flag, "influentialReferencesCount": self.paper.influentialref,
                "normalized_citations": self.paper.normalized, "reference_background": self.paper.ref_background,
                "reference_result": self.paper.ref_result, "reference_methodology": self.paper.ref_method,
                "citations_background": self.paper.cite_background, "citations_result": self.paper.cite_result,
                "citations_methodology": self.paper.cite_method, "citations_next": self.paper.cite_next,
                "upstream_influential_methodology_count": self.paper.influential_references_methodology,
                "coCite2":t2, "coCite3":t3, "ISSN":self.paper.issn, "authors":self.paper.auth,"citations":api_resp["citations"],"age":self.paper.age,
                "reading_score":reading_score, "subjectivity":subjectivity, "sentiment":sentiment, "supporting_sentences":self.support, "refuting_sentences":self.refute, "ratio_support":self.ratio}
Пример #2
0
    def extract_paper_info(self):
        # DOI
        doi = self.soup.teiheader.find("idno", type="DOI")
        if doi:
            self.paper.doi = elem_to_text(doi)
        elif self.document:
            self.paper.doi = self.document['doi']
        # Title
        title = self.soup.teiheader.find("title")
        if title:
            self.paper.title = elem_to_text(title)
        # Authors
        authors = self.get_authors(self.soup.analytic.find_all('author'))
        if authors:
            self.paper.authors = authors
        # Year
        published = self.soup.analytic.find("publicationstmt")
        if published:
            self.paper.year = elem_to_text(published.find("date", type="when"))
        # Organization / Affiliations
        affiliations = self.soup.analytic.find_all('affiliation')
        for affiliation in affiliations:
            org = Organization()
            org.type = "institution"
            org.name = elem_to_text(
                affiliation.find("orgname", type="institution"))
            address = Address()
            addr = affiliation.find("address")
            if addr:
                address.place = elem_to_text(addr.find("settlement"))
                address.region = elem_to_text(addr.find("region"))
                address.country = elem_to_text(addr.find("country"))
            org.address = address
            self.paper.affiliations.append(org)
        # University Ranking
        if self.paper.affiliations:
            if self.paper.affiliations[0] != '':
                self.paper.uni_rank = self.uni_rank.get_rank(
                    self.paper.affiliations[0].name)
            elif len(self.paper.affiliations) > 1:
                self.paper.uni_rank = self.uni_rank.get_rank(
                    self.paper.affiliations[1].name)
        else:
            self.paper.uni_rank = self.uni_rank.get_rank('Random')
        # Citations
        bibliography = self.soup.listbibl.find_all('biblstruct')
        for bibl in bibliography:
            citation = Citation()
            cited_paper = bibl.analytic
            if cited_paper:
                citation.title = elem_to_text(
                    cited_paper.find("title", type="main"))
                citation_authors = self.get_authors(
                    cited_paper.find_all("author"))
                citation.doi = elem_to_text(
                    cited_paper.find("idno", type="DOI"))
                if citation_authors:
                    citation.authors = citation_authors
            cited_journal = bibl.monogr
            if cited_journal:
                citation.source = elem_to_text(cited_journal.find("title"))
                try:
                    citation.publish_year = cited_journal.imprint.date['when']
                except TypeError:
                    pass
            self.paper.citations.append(citation)
        # NER - Ack pairs - Funding status
        self.paper.ack_pairs = self.get_funding_status()
        er_list = [org for (entity, org) in self.paper.ack_pairs]
        if 'ORG' in er_list:
            self.paper.funded = 1
        else:
            self.paper.funded = 0
        # SJR
        api_resp = self.get_sjr(self.paper.doi, self.paper.title)
        if api_resp:
            self.paper.cited_by_count = api_resp["num_citations"]
            self.paper.sjr = api_resp["sjr"]
            self.paper.subject = api_resp["subject"]
            self.paper.subject_code = api_resp["subject_code"]
            self.paper.normalized = api_resp["normalized_citations"]
            self.paper.velocity = api_resp["citationVelocity"]
            self.paper.influentialcitations = api_resp[
                "influentialCitationCount"]
            self.paper.references = api_resp["references_count"]
            self.paper.flag = api_resp["openaccessflag"]
            self.paper.influentialref = api_resp["influentialReferencesCount"]
            self.paper.ref_background = api_resp["reference_background"]
            self.paper.ref_result = api_resp["reference_result"]
            self.paper.ref_method = api_resp["reference_methodology"]
            self.paper.cite_background = api_resp["citations_background"]
            self.paper.cite_result = api_resp["citations_result"]
            self.paper.cite_method = api_resp["citations_methodology"]
            self.paper.cite_next = api_resp["citations_next"]
        # Set self-citations
        self.paper.self_citations = self.paper.set_self_citations()
        # Set influential_methodology_references
        self.paper.influential_references_methodology = self.set_influential_references_methodology(
        )
        # return paper

        t2, t3 = coCite(self.paper.doi)
        return {
            "doi": self.paper.doi,
            "title": self.paper.title,
            "num_citations": self.paper.cited_by_count,
            "author_count": len(self.paper.authors),
            "sjr": self.paper.sjr,
            "u_rank": self.paper.uni_rank,
            "funded": self.paper.funded,
            "self_citations": self.paper.self_citations,
            "subject": self.paper.subject,
            "subject_code": self.paper.subject_code,
            "citationVelocity": self.paper.velocity,
            "influentialCitationCount": self.paper.influentialcitations,
            "references_count": self.paper.references,
            "openaccessflag": self.paper.flag,
            "influentialReferencesCount": self.paper.influentialref,
            "normalized_citations": self.paper.normalized,
            "reference_background": self.paper.ref_background,
            "reference_result": self.paper.ref_result,
            "reference_methodology": self.paper.ref_method,
            "citations_background": self.paper.cite_background,
            "citations_result": self.paper.cite_result,
            "citations_methodology": self.paper.cite_method,
            "citations_next": self.paper.cite_next,
            "upstream_influential_methodology_count":
            self.paper.influential_references_methodology,
            "coCite2": t2,
            "coCite3": t3
        }