Exemplos de urlize em Python, exemplos de papers.utils.urlize em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: orcid.py Projeto: Lysxia/dissemin

 def homepage(self):
     """
     Extract an URL for that researcher (if any)
     """
     lst = jpath('orcid-profile/orcid-bio/researcher-urls/researcher-url', self.json, default=[])
     for url in lst:
         val = jpath('url/value', url)
         name = jpath('url-name/value', url)
         if name is not None and ('home' in name.lower() or 'personal' in name.lower()):
             return urlize(val)
     if len(lst):
         return urlize(jpath('url/value', lst[0])) or None

Exemplo n.º 2

0

Exibir arquivo

Arquivo: orcid.py Projeto: tarsbase/dissemin

 def homepage(self):
     """
     Extract an URL for that researcher (if any)
     """
     lst = jpath('person/researcher-urls/researcher-url',
                 self.json,
                 default=[])
     for url in lst:
         val = jpath('url/value', url)
         name = jpath('url-name', url)
         if name is not None and ('home' in name.lower()
                                  or 'personal' in name.lower()):
             return urlize(val)
     if len(lst):
         return urlize(jpath('url/value', lst[0])) or None

Exemplo n.º 3

0

Exibir arquivo

 def test_urlize(self):
     self.assertEqual(urlize('gnu.org'), 'http://gnu.org')
     self.assertTrue(urlize(None) is None)
     self.assertEqual(urlize(u'https://gnu.org'), 'https://gnu.org')

Exemplo n.º 4

0

Exibir arquivo

Arquivo: orcid.py Projeto: tarsbase/dissemin

    def search_by_name(first, last, instance=settings.ORCID_BASE_DOMAIN):
        """
        Searches for an ORCID profile matching this (first,last) name.
        Returns a list of such ORCID profiles.
        """
        # Validate arguments
        if not last:
            return
        # Perform query
        base_base_pub = "https://pub." + instance + "/"
        baseurl = base_base_pub + 'v1.2/search/orcid-bio/'
        dct = {
            'rows': 10,
            'start': 0,
            'q': 'family-name:%s given-names:%s' % (last, first),
        }
        url = baseurl + '?' + urlencode(dct)
        try:
            r = requests.get(url)
            # the namespace is the same for both the production and the
            # sandbox versions.
            ns = {'ns': 'http://www.orcid.org/ns/orcid'}
            xml = etree.fromstring(r.text.encode('utf-8'))
            for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns):
                candidateFirst = None
                candidateLast = None
                # Get name
                pers_details = elem.xpath('.//ns:personal-details',
                                          namespaces=ns)
                if not pers_details:
                    continue
                for item in pers_details[0]:
                    if item.tag.endswith('given-names'):
                        candidateFirst = item.text
                    elif item.tag.endswith('family-name'):
                        candidateLast = item.text
                if not candidateFirst or not candidateLast:
                    continue
                # Check that the names are compatible
                if shallower_name_similarity(
                    (first, last), (candidateFirst, candidateLast)) == 0:
                    continue

                # Get ORCID iD
                orcid_elem = elem.xpath(
                    './ns:orcid-profile/ns:orcid-identifier/ns:path',
                    namespaces=ns)
                if not orcid_elem:
                    continue
                orcid = orcid_elem[0].text

                # Add other things
                lst = elem.xpath(
                    './ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()',
                    namespaces=ns)
                homepage = None
                for url in lst:
                    homepage = urlize(url)
                    break

                keywords = elem.xpath(
                    './ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()',
                    namespaces=ns)

                yield {
                    'first': candidateFirst,
                    'last': candidateLast,
                    'orcid': orcid,
                    'homepage': homepage,
                    'keywords': keywords,
                }

        except etree.XMLSyntaxError as e:
            print e
        except requests.exceptions.RequestException as e:
            print e

Exemplo n.º 5

0

Exibir arquivo

Arquivo: orcid.py Projeto: Lysxia/dissemin

    def search_by_name(first, last):
        """
        Searches for an ORCID profile matching this (first,last) name.
        Returns a list of such ORCID profiles.
        """
        # Validate arguments
        if not last:
            return
        # Perform query
        baseurl = 'http://pub.orcid.org/v1.2/search/orcid-bio/'
        dct = {
            'rows':10,
            'start':0,
            'q':'family-name:%s given-names:%s' % (last,first),
            }
        url = baseurl+'?'+urlencode(dct)
        try:
            r = requests.get(url)
            ns = {'ns':'http://www.orcid.org/ns/orcid' }
            xml = etree.fromstring(r.text.encode('utf-8'))
            for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns):
                candidateFirst = None
                candidateLast = None
                # Get name
                pers_details = elem.xpath('.//ns:personal-details', namespaces=ns)
                if not pers_details:
                    continue
                for item in pers_details[0]:
                    if item.tag.endswith('given-names'):
                        candidateFirst = item.text
                    elif item.tag.endswith('family-name'):
                        candidateLast = item.text
                if not candidateFirst or not candidateLast:
                    continue
                # Check that the names are compatible
                if shallower_name_similarity((first,last),(candidateFirst,candidateLast)) == 0:
                    continue

                # Get ORCID iD
                orcid_elem = elem.xpath('./ns:orcid-profile/ns:orcid-identifier/ns:path', namespaces=ns)
                if not orcid_elem:
                    continue
                orcid = orcid_elem[0].text

                # Add other things
                lst = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()', namespaces=ns)
                homepage = None 
                for url in lst:
                    homepage = urlize(url)
                    break

                keywords = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()', namespaces=ns)

                yield {
                        'first':candidateFirst,
                        'last':candidateLast,
                        'orcid':orcid,
                        'homepage':homepage,
                        'keywords':keywords,
                      }

        except etree.XMLSyntaxError as e:
            print e
        except requests.exceptions.RequestException as e:
            print e