示例#1
0
    def geocode_url(self, url, attempted=None):
        if attempted is None:
            attempted = set()

        util.logger.debug("Fetching %s...", url)
        page = self._call_geocoder(url)
        soup = BeautifulSoup(page)

        rdf_url = self.parse_rdf_link(soup)
        util.logger.debug("Fetching %s..." % rdf_url)
        page = self.urlopen(rdf_url)

        things, thing = self.parse_rdf(page)  # TODO
        name = self.get_label(thing)

        attributes = self.get_attributes(thing)
        for _, value in attributes:
            latitude, longitude = util.parse_geo(value)
            if None not in (latitude, longitude):
                break

        if None in (latitude, longitude):
            tried = set()  # TODO undefined tried -- is this right?
            relations = self.get_relations(thing)
            for _, resource in relations:
                url = things.get(resource, resource)  # pylint: disable=E1103
                if url in tried:  # Avoid cyclic relationships.
                    continue
                tried.add(url)
                name, (latitude, longitude) = self.geocode_url(url, tried)
                if None not in (name, latitude, longitude):
                    break

        return (name, (latitude, longitude))
示例#2
0
 def parse_rdf_link(page, mime_type='application/rdf+xml'):
     """Parse the URL of the RDF link from the <head> of ``page``."""
     soup = BeautifulSoup(page)
     link = soup.head.find(  # pylint: disable=E1101,E1103
         'link',
         rel='alternate',
         type=mime_type)
     return link and link['href'] or None
示例#3
0
    def parse_xhtml(self, page):
        soup = isinstance(page, BeautifulSoup) and page or BeautifulSoup(page)

        meta = soup.head.find('meta', {'name': 'geo.placename'})
        name = meta and meta['content'] or None

        meta = soup.head.find('meta', {'name': 'geo.position'})
        if meta:
            position = meta['content']
            # no parse_geo? TODO
            latitude, longitude = parse_geo(position)
            if latitude == 0 or longitude == 0:
                latitude = longitude = None
        else:
            latitude = longitude = None

        return (name, (latitude, longitude))