Пример #1
0
 def __init__(self, api_url):
     """
     """
     self.api_url = api_url
     if len(self.api_url) > 0 and self.api_url[-1] != "/":
         self.api_url += "/"
     self.reader = ParsCitReader()
Пример #2
0
 def __init__(self, api_url):
     """
     """
     self.api_url = api_url
     if len(self.api_url) > 0 and self.api_url[-1] !="/":
         self.api_url += "/"
     self.reader=ParsCitReader()
Пример #3
0
class ParsCitClient:
    """
        Connects to ParsCit web service, sends documents and converts the
        response to metadata.
    """
    def __init__(self, api_url):
        """
        """
        self.api_url = api_url
        if len(self.api_url) > 0 and self.api_url[-1] !="/":
            self.api_url += "/"
        self.reader=ParsCitReader()

    def tagFullDocument(self, doc_text):
        """
        """
        raise NotImplementedError


    def extractReferenceList(self, ref_list, format="raw"):
        """
            Main function: interface with ParsCit.

            Args:
                ref_list: list of strings to be processed. Each string can
                          contain multiple references
        """
        res=[]
        if isinstance(ref_list, list):
            ref_list ="\n\n".join(ref_list)

        # Hack so ParsCit will actually recognize the references
        if format=="raw":
            ref_list=u"References\n\n%s" % ref_list
        else:
            ref_list=u"<references>%s</references>" % ref_list

        data={"text":ref_list, "format":format}

        r=requests.post(self.api_url+"extract_citations/", json=data)

        if r.status_code != 200:
            # TODO specialized exceptions
            print(ref_list)
            raise ValueError("ParsCit exception")

        json_data=json.loads(r.content)
        res=self.reader.parseParsCitXML(json_data["parsed_xml"])
        return res
Пример #4
0
class ParsCitClient:
    """
        Connects to ParsCit web service, sends documents and converts the
        response to metadata.
    """
    def __init__(self, api_url):
        """
        """
        self.api_url = api_url
        if len(self.api_url) > 0 and self.api_url[-1] != "/":
            self.api_url += "/"
        self.reader = ParsCitReader()

    def tagFullDocument(self, doc_text):
        """
        """
        raise NotImplementedError

    def extractReferenceList(self, ref_list, format="raw"):
        """
            Main function: interface with ParsCit.

            Args:
                ref_list: list of strings to be processed. Each string can
                          contain multiple references
        """
        res = []
        if isinstance(ref_list, list):
            ref_list = "\n\n".join(ref_list)

        # Hack so ParsCit will actually recognize the references
        if format == "raw":
            ref_list = u"References\n\n%s" % ref_list
        else:
            ref_list = u"<references>%s</references>" % ref_list

        data = {"text": ref_list, "format": format}

        r = requests.post(self.api_url + "extract_citations/", json=data)

        if r.status_code != 200:
            # TODO specialized exceptions
            print(ref_list)
            raise Exception

        json_data = json.loads(r.content)
        res = self.reader.parseParsCitXML(json_data["parsed_xml"])
        return res