示例#1
0
 def _load_from_csv(self, reader, entity_type, source):
     j = 0
     for i, line in enumerate(reader):
         postcode_abbrev, (easting, northing) = line[0], line[10:12]
         if postcode_abbrev[-4] != ' ':
             postcode = '%s %s' % (postcode_abbrev[:-3], postcode_abbrev[-3:])
         else:
             postcode = postcode_abbrev
         postcode_abbrev = postcode_abbrev.replace(' ', '')
             
         try:
             easting, northing = int(easting), int(northing)
         except ValueError:
             continue
             
         j += 1
         
         try:
             entity = Entity.objects.get(source=source, _identifiers__scheme='postcode', _identifiers__value=postcode_abbrev)
         except Entity.DoesNotExist:
             entity = Entity(source=source)
         
         entity.location = Point(easting, northing, srid=27700)
         entity.geometry = entity.location
         entity.primary_type = entity_type
         
         identifiers = {
             'postcode': postcode_abbrev,
             'postcode-canonical': postcode,
         }
         entity.save(identifiers=identifiers)
         set_name_in_language(entity, 'en',
                              title = postcode)
         entity.all_types.add(entity_type)
         entity.update_all_types_completion()
示例#2
0
 def _get_entity_types(self):
     
     entity_types = {}
     new_entity_types = set()
     for slug, et in self._entity_types.items():
         et_category, created = EntityTypeCategory.objects.get_or_create(name=et['category'])
         try:
             entity_type = EntityType.objects.get(slug=slug)
             created = False
         except EntityType.DoesNotExist:
             entity_type = EntityType(slug=slug)
             created = True
         entity_type.category = et_category
         entity_type.slug = slug
         if created:
             entity_type.show_in_nearby_list = et['show_in_nearby_list']
             entity_type.show_in_category_list = et['show_in_category_list']
         entity_type.save()
         for lang_code, lang_name in settings.LANGUAGES:
             with override(lang_code):
                 set_name_in_language(entity_type, lang_code,
                                      verbose_name=_(et['verbose_name']),
                                      verbose_name_singular=_(et['verbose_name_singular']),
                                      verbose_name_plural=_(et['verbose_name_plural']))
         new_entity_types.add(slug)
         entity_types[slug] = entity_type
     
     for slug in new_entity_types:
         subtype_of = self._entity_types[slug]['parent-types']
         entity_types[slug].subtype_of.clear()
         for s in subtype_of:
             entity_types[slug].subtype_of.add(entity_types[s])
         entity_types[slug].save()
     
     return entity_types
示例#3
0
 def _get_entity_types(self):
     
     entity_types = {}
     new_entity_types = set()
     for slug, et in self._entity_types.items():
         et_category, created = EntityTypeCategory.objects.get_or_create(name=et['category'])
         try:
             entity_type = EntityType.objects.get(slug=slug)
             created = False
         except EntityType.DoesNotExist:
             entity_type = EntityType(slug=slug)
             created = True
         entity_type.category = et_category
         entity_type.slug = slug
         if created:
             entity_type.show_in_nearby_list = et['show_in_nearby_list']
             entity_type.show_in_category_list = et['show_in_category_list']
         entity_type.save()
         for lang_code, lang_name in settings.LANGUAGES:
             with override(lang_code):
                 set_name_in_language(entity_type, lang_code,
                                      verbose_name=_(et['verbose_name']),
                                      verbose_name_singular=_(et['verbose_name_singular']),
                                      verbose_name_plural=_(et['verbose_name_plural']))
         new_entity_types.add(slug)
         entity_types[slug] = entity_type
     
     for slug in new_entity_types:
         subtype_of = self._entity_types[slug]['parent-types']
         entity_types[slug].subtype_of.clear()
         for s in subtype_of:
             entity_types[slug].subtype_of.add(entity_types[s])
         entity_types[slug].save()
     
     return entity_types
示例#4
0
 def _get_entity(self, stop_code, stop_name, source, entity_type):
     """Finds a bus stop entity or creates one if it cannot be found.
     If multiple entities are found we clean them up.
     """
     scheme = 'naptan'
     try:
         entity = get_entity(scheme, stop_code)
     except:
         try:
             entity = Entity.objects.get(_identifiers__scheme=scheme,
                                         _identifiers__value=stop_code)
             logger.debug("Found Entity: %s" % entity)
         except Entity.DoesNotExist:
             logger.debug("Entity does not exist: %s-%s" %
                          (stop_code, stop_name))
             entity = Entity()
         except Entity.MultipleObjectsReturned:
             logger.warning("Multiple Entities found for : %s-%s" %
                            (stop_code, stop_name))
             Entity.objects.filter(_identifiers__scheme=scheme,
                                   _identifiers__value=stop_code).delete()
             entity = Entity()
         entity.primary_type = entity_type
         entity.source = source
         identifiers = {scheme: stop_code}
         set_name_in_language(entity, 'en', title=stop_name)
         entity.all_types = (entity_type, )
         entity.save(identifiers=identifiers)
     return entity
示例#5
0
    def import_data(self, metadata, output):
        source, entity_type = self._get_source(), self._get_entity_type()

        parser = etree.XMLParser(load_dtd=True)
        parser.resolvers.add(BBCTPEGResolver())
        xml = etree.parse(urllib.urlopen(self._tpeg_url), parser=parser)

        entities, seen = {}, set()
        for entity in Entity.objects.filter(source=source):
            if 'bbc-tpeg' in entity.identifiers:
                entities[entity.identifiers['bbc-tpeg']] = entity

        for message in xml.getroot().findall('tpeg_message'):
            id = message.find('road_traffic_message').attrib['message_id']
            road_traffic_message = message.find('road_traffic_message')

            try:
                entity = entities[id]
            except KeyError:
                entity = Entity()
                entities[id] = entity

            entity.source = source
            entity.primary_type = entity_type

            locs = map(
                self._wgs84_to_point,
                road_traffic_message.findall(
                    'location_container/location_coordinates/WGS84'))
            if len(locs) > 1:
                entity.geometry = LineString(*locs)
            elif len(locs) == 1:
                entity.geometry = locs[0]
            else:
                continue
            entity.location = Point(
                sum(p.x for p in locs) / len(locs),
                sum(p.y for p in locs) / len(locs),
                srid=4326,
            )

            entity.metadata['bbc_tpeg'] = {
                'xml': etree.tostring(message),
                'severity': road_traffic_message.attrib['severity_factor'],
                'generated':
                road_traffic_message.attrib['message_generation_time'],
                'version': int(road_traffic_message.attrib['version_number']),
            }

            entity.save(identifiers={'bbc-tpeg': id})
            set_name_in_language(entity,
                                 'en',
                                 title=message.find('summary').text)
            entity.all_types = [entity_type]
            entity.update_all_types_completion()
            seen.add(entity.pk)

        for entity in Entity.objects.filter(source=source):
            if not entity.pk in seen:
                entity.delete()
示例#6
0
 def _get_entity(self, stop_code, stop_name, source, entity_type):
     """Finds a bus stop entity or creates one if it cannot be found.
     If multiple entities are found we clean them up.
     """
     scheme = 'naptan'
     try:
         entity = get_entity(scheme, stop_code)
     except:
         try:
             entity = Entity.objects.get(_identifiers__scheme=scheme,
                     _identifiers__value=stop_code)
             logger.debug("Found Entity: %s" % entity)
         except Entity.DoesNotExist:
             logger.debug("Entity does not exist: %s-%s" % (stop_code, stop_name))
             entity = Entity()
         except Entity.MultipleObjectsReturned:
             logger.warning("Multiple Entities found for : %s-%s" % (stop_code, stop_name))
             Entity.objects.filter(_identifiers__scheme=scheme,
                     _identifiers__value=stop_code).delete()
             entity = Entity()
         entity.primary_type = entity_type
         entity.source = source
         identifiers = {scheme: stop_code}
         set_name_in_language(entity, 'en', title=stop_name)
         entity.all_types = (entity_type,)
         entity.save(identifiers=identifiers)
     return entity
示例#7
0
 def _scrape(self, route, url, output):
     self._output.write(route)
     url += '&showall=1'
     service = etree.parse(urlopen(url), parser = etree.HTMLParser())
     route.stops.clear()
     for i, tr in enumerate(service.find('.//table').findall('tr')[1:]):
         
         try:
             stop_code = tr[1][0].text
         except IndexError:
             
             # Stops on ACIS Live that don't have codes, e.g., out of county
             # stops
             stop_name = tr[3][0].text
             try:
                 entity = Entity.objects.get(source=self._get_source(),
                                             _identifiers__scheme='acisroute',
                                             _identifiers__value=stop_name)
             except Entity.DoesNotExist:
                 entity = Entity(source=self._get_source())
             
             entity_type = self._get_entity_type()
             entity.primary_type = entity_type
             identifiers = { 'acisroute': stop_name }
             entity.save(identifiers=identifiers)
             set_name_in_language(entity, 'en', title=stop_name)
             entity.all_types = (entity_type,)
             entity.update_all_types_completion()
         
         else:
             # TODO: Change identifier lookup based on ACIS region
             try:
                 entity = get_entity('naptan', stop_code)
                 if entity.source == self._get_source():
                     # Raise Http404 if this is a bus stop we came up with,
                     # so any name changes, etc, get processed
                     raise Http404()
             except Http404:
                 # Out of zone bus stops with NaPTAN codes - alternatively,
                 # the fake bus stops Oxontime made up for the TUBE route
                 try:
                     entity = Entity.objects.get(source=self._get_source(),
                                                 _identifiers__scheme='naptan',
                                                 _identifiers__value=stop_code)
                 except Entity.DoesNotExist:
                     entity = Entity(source=self._get_source())
                 identifiers = { 'naptan': stop_code }
                 entity_type = self._get_entity_type()
                 entity.primary_type = entity_type
                 entity.save(identifiers=identifiers)
                 set_name_in_language(entity, 'en', title=tr[3][0].text)
                 entity.all_types = (entity_type,)
                 entity.update_all_types_completion()
                 entity.save()
             
         StopOnRoute.objects.create(route=route, entity=entity, order=i)
示例#8
0
 def import_data(self, metadata, output):
     source, entity_type = self._get_source(), self._get_entity_type()
     
     parser = etree.XMLParser(load_dtd=True)
     parser.resolvers.add(BBCTPEGResolver())
     xml = etree.parse(urllib.urlopen(self._tpeg_url), parser=parser)
     
     entities, seen = {}, set()
     for entity in Entity.objects.filter(source=source):
         if 'bbc-tpeg' in entity.identifiers:
             entities[entity.identifiers['bbc-tpeg']] = entity
     
     for message in xml.getroot().findall('tpeg_message'):
         id = message.find('road_traffic_message').attrib['message_id']
         road_traffic_message = message.find('road_traffic_message')
         
         try:
             entity = entities[id]
         except KeyError:
             entity = Entity()
             entities[id] = entity
         
         entity.source = source
         entity.primary_type = entity_type
         
         locs = map(self._wgs84_to_point, road_traffic_message.findall('location_container/location_coordinates/WGS84'))
         if len(locs) > 1:
             entity.geometry = LineString(*locs)
         elif len(locs) == 1:
             entity.geometry = locs[0]
         else:
             continue
         entity.location = Point(
             sum(p.x for p in locs)/len(locs), 
             sum(p.y for p in locs)/len(locs), 
             srid=4326,
         )
         
         entity.metadata['bbc_tpeg'] = {
             'xml': etree.tostring(message),
             'severity': road_traffic_message.attrib['severity_factor'],
             'generated': road_traffic_message.attrib['message_generation_time'],
             'version': int(road_traffic_message.attrib['version_number']),
         }
         
         entity.save(identifiers={'bbc-tpeg': id})
         set_name_in_language(entity, 'en',
                              title = message.find('summary').text)
         entity.all_types = [entity_type]
         entity.update_all_types_completion()
         seen.add(entity.pk)
     
     for entity in Entity.objects.filter(source=source):
         if not entity.pk in seen:
             entity.delete()
示例#9
0
    def _load_from_csv(self, reader, entity_type, source):
        j = 0
        for i, line in enumerate(reader):
            postcode_abbrev, (easting, northing) = line[0], line[10:12]
            postcode_abbrev = postcode_abbrev.replace(' ', '')

            # Now try to figure out where to put the space in
            if re.match(r'[A-Z][0-9]{2}[A-Z]{2}', postcode_abbrev):
                # A9 9AA
                postcode = '%s %s' % (postcode_abbrev[:2], postcode_abbrev[2:])
            elif re.match(r'[A-Z][0-9]{3}[A-Z]{2}', postcode_abbrev):
                # A99 9AA
                postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:])
            elif re.match(r'[A-Z]{2}[0-9]{2}[A-Z]{2}', postcode_abbrev):
                # AA9 9AA
                postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:])
            elif re.match(r'[A-Z]{2}[0-9]{3}[A-Z]{2}', postcode_abbrev):
                # AA99 9AA
                postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:])
            elif re.match(r'[A-Z][0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev):
                # A9A 9AA
                postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:])
            elif re.match(r'[A-Z]{2}[0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev):
                # AA9A 9AA
                postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:])
            else:
                postcode = postcode_abbrev

            try:
                easting, northing = int(easting), int(northing)
            except ValueError:
                continue

            j += 1

            try:
                entity = Entity.objects.get(
                    source=source,
                    _identifiers__scheme='postcode',
                    _identifiers__value=postcode_abbrev)
            except Entity.DoesNotExist:
                entity = Entity(source=source)

            entity.location = Point(easting, northing, srid=27700)
            entity.geometry = entity.location
            entity.primary_type = entity_type

            identifiers = {
                'postcode': postcode_abbrev,
                'postcode-canonical': postcode,
            }
            entity.save(identifiers=identifiers)
            set_name_in_language(entity, 'en', title=postcode)
            entity.all_types.add(entity_type)
            entity.update_all_types_completion()
示例#10
0
 def _load_from_csv(self, reader, entity_type, source):
     j = 0
     for i, line in enumerate(reader):
         postcode_abbrev, (easting, northing) = line[0], line[10:12]
         postcode_abbrev = postcode_abbrev.replace(' ', '')
         
         # Now try to figure out where to put the space in
         if re.match(r'[A-Z][0-9]{2}[A-Z]{2}', postcode_abbrev):
             # A9 9AA
             postcode = '%s %s' % (postcode_abbrev[:2], postcode_abbrev[2:])
         elif re.match(r'[A-Z][0-9]{3}[A-Z]{2}', postcode_abbrev):
             # A99 9AA
             postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:])
         elif re.match(r'[A-Z]{2}[0-9]{2}[A-Z]{2}', postcode_abbrev):
             # AA9 9AA
             postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:])
         elif re.match(r'[A-Z]{2}[0-9]{3}[A-Z]{2}', postcode_abbrev):
             # AA99 9AA
             postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:])
         elif re.match(r'[A-Z][0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev):
             # A9A 9AA
             postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:])
         elif re.match(r'[A-Z]{2}[0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev):
             # AA9A 9AA
             postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:])
         else:
             postcode = postcode_abbrev
         
         try:
             easting, northing = int(easting), int(northing)
         except ValueError:
             continue
             
         j += 1
         
         try:
             entity = Entity.objects.get(source=source, _identifiers__scheme='postcode', _identifiers__value=postcode_abbrev)
         except Entity.DoesNotExist:
             entity = Entity(source=source)
         
         entity.location = Point(easting, northing, srid=27700)
         entity.geometry = entity.location
         entity.primary_type = entity_type
         
         identifiers = {
             'postcode': postcode_abbrev,
             'postcode-canonical': postcode,
         }
         entity.save(identifiers=identifiers)
         set_name_in_language(entity, 'en',
                              title = postcode)
         entity.all_types.add(entity_type)
         entity.update_all_types_completion()
示例#11
0
    def endElement(self, name):
        self.name_stack.pop()

        if name == 'StopPoint':
            try:
                # Classify metro stops according to their particular system
                if self.meta['stop-type'] == 'MET':
                    try:
                        entity_type, is_entrance = self.entity_types[
                            self.meta['stop-type'] + ':' +
                            self.meta['atco-code'][6:8]]
                    except KeyError:
                        entity_type, is_entrance = self.entity_types['MET']
                else:
                    entity_type, is_entrance = self.entity_types[
                        self.meta['stop-type']]
            except KeyError:
                pass
            else:
                entity = self.add_stop(self.meta, entity_type, self.source,
                                       is_entrance)
                if entity:
                    self.entities.add(entity)

        elif name == 'StopAreaRef':
            self.stop_areas.append(self.meta['stop-area'])
            del self.meta['stop-area']

        elif name == 'StopArea':
            if self.areas != None:
                in_area = False
                for area in self.areas:
                    if self.meta['area-code'].startswith(area):
                        in_area = True
                if not in_area:
                    return

            sa, created = EntityGroup.objects.get_or_create(
                source=self.source, ref_code=self.meta['area-code'])
            sa.save()
            for lang_code, name in self.names.items():
                if lang_code is None: lang_code = 'en'
                set_name_in_language(sa, lang_code, title=name)

        elif name == 'CommonName':
            if self.lang not in self.names:
                self.names[self.lang] = self.meta['common-name']

        elif name == 'Name' and self.meta['name'] != '':
            if self.lang not in self.names:
                self.names[self.lang] = self.meta['name']
示例#12
0
    def endElement(self, name):
        self.name_stack.pop()

        if name == 'StopPoint':
            try:
                # Classify metro stops according to their particular system
                if self.meta['stop-type'] == 'MET':
                    try:
                        entity_type, is_entrance = self.entity_types[self.meta['stop-type'] + ':' + self.meta['atco-code'][6:8]]
                    except KeyError:
                        entity_type, is_entrance = self.entity_types['MET']
                else:
                    entity_type, is_entrance = self.entity_types[self.meta['stop-type']]
            except KeyError:
                pass
            else:
                entity = self.add_stop(self.meta, entity_type, self.source, is_entrance)
                if entity:
                    self.entities.add(entity)
        
        elif name == 'StopAreaRef':
            self.stop_areas.append(self.meta['stop-area'])
            del self.meta['stop-area']
        
        elif name == 'StopArea':
            if self.areas != None:
                in_area = False
                for area in self.areas:
                    if self.meta['area-code'].startswith(area):
                        in_area = True
                if not in_area:
                    return
            
            sa, created = EntityGroup.objects.get_or_create(
                source=self.source,
                ref_code=self.meta['area-code'])
            sa.save()
            for lang_code, name in self.names.items():
                if lang_code is None: lang_code = 'en'
                set_name_in_language(sa, lang_code, title=name)
        
        elif name == 'CommonName':
            if self.lang not in self.names:
                self.names[self.lang] = self.meta['common-name']
        
        elif name == 'Name' and self.meta['name'] != '':
            if self.lang not in self.names:
                self.names[self.lang] = self.meta['name']
示例#13
0
 def _get_entity_type(self):
     category, created = EntityTypeCategory.objects.get_or_create(name=ugettext_noop('Uncategorised'))
     entity_type, created = EntityType.objects.get_or_create(
         slug='post-code', category=category)
     entity_type.slug = 'post-code'
     if created:
         entity_type.show_in_nearby_list = False
         entity_type.show_in_category_list = False
     entity_type.save()
     for lang_code, lang_name in settings.LANGUAGES:
         with override(lang_code):
             set_name_in_language(entity_type, lang_code,
                                  verbose_name=_('postcode'),
                                  verbose_name_singular=_('a postcode'),
                                  verbose_name_plural=_('postcodes'))
     return entity_type
示例#14
0
    def _get_entity_types(self):

        entity_types = {}
        category, created = EntityTypeCategory.objects.get_or_create(
            name=_('Transport'))
        category.save()

        for stop_type in self.entity_type_definitions:
            et = self.entity_type_definitions[stop_type]

            try:
                entity_type = EntityType.objects.get(slug=et['slug'])
            except EntityType.DoesNotExist:
                entity_type = EntityType(slug=et['slug'])

            entity_type.category = category
            entity_type.uri = "http://mollyproject.org/schema/maps#%s" % et[
                'uri-local']
            if created:
                entity_type.show_in_nearby_list = et['nearby']
                entity_type.show_in_category_list = et['category']
            entity_type.save()
            for lang_code, lang_name in settings.LANGUAGES:
                with override(lang_code):
                    set_name_in_language(entity_type,
                                         lang_code,
                                         verbose_name=ugettext(
                                             et['verbose-name']),
                                         verbose_name_singular=ugettext(
                                             et['verbose-name-singular']),
                                         verbose_name_plural=ugettext(
                                             et['verbose-name-plural']))

            entity_types[stop_type] = entity_type

        for stop_type, entity_type in entity_types.items():
            if entity_type.slug == 'public-transport-access-node':
                continue
            entity_type.subtype_of.add(entity_types[None])
            if stop_type.startswith(
                    'MET'
            ) and stop_type != 'MET' and entity_type.slug != self.RAIL_STATION_DEFINITION[
                    'slug']:
                entity_type.subtype_of.add(entity_types['MET'])

        return entity_types
示例#15
0
 def _get_entity_type(self):
     category, created = EntityTypeCategory.objects.get_or_create(
         name=ugettext_noop('Uncategorised'))
     entity_type, created = EntityType.objects.get_or_create(
         slug='post-code', category=category)
     entity_type.slug = 'post-code'
     if created:
         entity_type.show_in_nearby_list = False
         entity_type.show_in_category_list = False
     entity_type.save()
     for lang_code, lang_name in settings.LANGUAGES:
         with override(lang_code):
             set_name_in_language(entity_type,
                                  lang_code,
                                  verbose_name=_('postcode'),
                                  verbose_name_singular=_('a postcode'),
                                  verbose_name_plural=_('postcodes'))
     return entity_type
示例#16
0
 def decode_category(self, attrib):
     if self._category is None:
         cat = 'Uncategorised'
     else:
         cat = self._category
     
     slug = slugify(cat)
     
     podcast_category, created = PodcastCategory.objects.get_or_create(slug=slug)
     set_name_in_language(podcast_category, self.lang_code, name=cat)
     
     try:
         podcast_category.order = self.CATEGORY_ORDERS[slug]
     except KeyError:
         self.CATEGORY_ORDERS[slug] = len(self.CATEGORY_ORDERS)
         podcast_category.order = self.CATEGORY_ORDERS[slug]
     
     podcast_category.save()
     return podcast_category
示例#17
0
 def _get_entity_type(self):
     try:
         entity_type = EntityType.objects.get(slug='travel-alert')
         created = False
     except EntityType.DoesNotExist:
         entity_type = EntityType(slug='travel-alert')
         created = True
     category, etc_created = EntityTypeCategory.objects.get_or_create(name=ugettext_noop('Transport'))
     if created:
         entity_type.show_in_nearby_list = False
         entity_type.show_in_category_list = False
     entity_type.category = category
     entity_type.save()
     for lang_code, lang_name in settings.LANGUAGES:
         with override(lang_code):
             set_name_in_language(entity_type, lang_code,
                                  verbose_name=_('travel alert'),
                                  verbose_name_singular=_('a travel alert'),
                                  verbose_name_plural=_('travel alerts'))
     return entity_type
示例#18
0
    def decode_category(self, attrib):
        if self._category is None:
            cat = 'Uncategorised'
        else:
            cat = self._category

        slug = slugify(cat)

        podcast_category, created = PodcastCategory.objects.get_or_create(
            slug=slug)
        set_name_in_language(podcast_category, self.lang_code, name=cat)

        try:
            podcast_category.order = self.CATEGORY_ORDERS[slug]
        except KeyError:
            self.CATEGORY_ORDERS[slug] = len(self.CATEGORY_ORDERS)
            podcast_category.order = self.CATEGORY_ORDERS[slug]

        podcast_category.save()
        return podcast_category
示例#19
0
    def import_data(self, **metadata):
        atom = self.atom
        xml = etree.parse(urllib.urlopen(self.url))

        rss_urls = []

        category_elems = xml.getroot().findall(atom('entry'))

        for i, category_elem in enumerate(category_elems):
            link = category_elem.find(atom('link') + "[@rel='alternate']")
            slug = link.attrib['href'].split('/')[-1]

            category, created = PodcastCategory.objects.get_or_create(
                slug=slug)
            set_name_in_language(category,
                                 lang_code,
                                 name=category_elem.find(atom('title')).text)
            category.order = i
            category.save()

            category_xml = etree.parse(urllib.urlopen(link.attrib['href']))

            for podcast_elem in category_xml.getroot().findall(atom('entry')):
                url = podcast_elem.find(atom('link') +
                                        "[@rel='alternate']").attrib['href']
                slug = url.split('/')[-1]
                podcast, created = Podcast.objects.get_or_create(
                    provider=self.class_path, slug=slug)
                podcast.rss_url = url

                podcast.category = category

                rss_urls.append(url)

                self.update_podcast.delay(podcast)

        for podcast in Podcast.objects.filter(provider=self.class_path):
            if not podcast.rss_url in rss_urls:
                podcast.delete()

        return metadata
示例#20
0
 def _get_entity_type(self):
     try:
         entity_type = EntityType.objects.get(slug='travel-alert')
         created = False
     except EntityType.DoesNotExist:
         entity_type = EntityType(slug='travel-alert')
         created = True
     category, etc_created = EntityTypeCategory.objects.get_or_create(
         name=ugettext_noop('Transport'))
     if created:
         entity_type.show_in_nearby_list = False
         entity_type.show_in_category_list = False
     entity_type.category = category
     entity_type.save()
     for lang_code, lang_name in settings.LANGUAGES:
         with override(lang_code):
             set_name_in_language(entity_type,
                                  lang_code,
                                  verbose_name=_('travel alert'),
                                  verbose_name_singular=_('a travel alert'),
                                  verbose_name_plural=_('travel alerts'))
     return entity_type
示例#21
0
    def _get_entity_types(self):

        entity_types = {}
        category, created = EntityTypeCategory.objects.get_or_create(name=_('Transport'))
        category.save()
        
        for stop_type in self.entity_type_definitions:
            et = self.entity_type_definitions[stop_type]
            
            try:
                entity_type = EntityType.objects.get(slug=et['slug'])
            except EntityType.DoesNotExist:
                entity_type = EntityType(slug=et['slug'])
            
            entity_type.category = category
            entity_type.uri = "http://mollyproject.org/schema/maps#%s" % et['uri-local']
            if created:
                entity_type.show_in_nearby_list = et['nearby']
                entity_type.show_in_category_list = et['category']
            entity_type.save()
            for lang_code, lang_name in settings.LANGUAGES:
                with override(lang_code):
                    set_name_in_language(entity_type, lang_code,
                                         verbose_name=ugettext(et['verbose-name']),
                                         verbose_name_singular=ugettext(et['verbose-name-singular']),
                                         verbose_name_plural=ugettext(et['verbose-name-plural']))
            
            entity_types[stop_type] = entity_type

        for stop_type, entity_type in entity_types.items():
            if entity_type.slug == 'public-transport-access-node':
                continue
            entity_type.subtype_of.add(entity_types[None])
            if stop_type.startswith('MET') and stop_type != 'MET' and entity_type.slug != self.RAIL_STATION_DEFINITION['slug']:
                entity_type.subtype_of.add(entity_types['MET'])
        

        return entity_types
示例#22
0
    def import_data(self, **metadata):
        atom = self.atom
        xml = etree.parse(urllib.urlopen(self.url))

        rss_urls = []

        category_elems = xml.getroot().findall(atom('entry'))
        
        for i, category_elem in enumerate(category_elems):
            link = category_elem.find(atom('link')+"[@rel='alternate']")
            slug = link.attrib['href'].split('/')[-1]
            
            category, created = PodcastCategory.objects.get_or_create(slug=slug)
            set_name_in_language(category, lang_code, name=category_elem.find(atom('title')).text)
            category.order = i
            category.save()
            
            category_xml = etree.parse(urllib.urlopen(link.attrib['href']))
            
            for podcast_elem in category_xml.getroot().findall(atom('entry')):
                url = podcast_elem.find(atom('link')+"[@rel='alternate']").attrib['href']
                slug = url.split('/')[-1]
                podcast, created = Podcast.objects.get_or_create(
                    provider=self.class_path,
                    slug=slug)
                podcast.rss_url = url
        
                podcast.category = category
        
                rss_urls.append(url)
        
                self.update_podcast.delay(podcast)

        for podcast in Podcast.objects.filter(provider=self.class_path):
            if not podcast.rss_url in rss_urls:
                podcast.delete()
        
        return metadata
示例#23
0
 def endElement(self, name):
     if name in ('node','way') and self.valid:
         try:
             types = self.find_types(self.tags)
         except ValueError:
             self.ignore_count += 1
             return
         
         # Ignore ways that lay partly outside our bounding box
         if name == 'way' and not all(id in self.node_locations for id in self.nodes):
             return
         
         # We already have these from OxPoints, so leave them alone.
         if self.tags.get('amenity') == 'library' and self.tags.get('operator') == 'University of Oxford':
             return
         
         # Ignore disused and under-construction entities
         if self.tags.get('life_cycle', 'in_use') != 'in_use' or self.tags.get('disused') in ('1', 'yes', 'true'):
             return
         
         try:
             entity = Entity.objects.get(source=self.source,
                                         _identifiers__scheme='osm',
                                         _identifiers__value=self.id)
             created = True
         except Entity.DoesNotExist:
             entity = Entity(source=self.source)
             created = False
         
         if not 'osm' in entity.metadata or \
           entity.metadata['osm'].get('attrs', {}).get('timestamp', '') < self.attrs['timestamp']:
             
             if created:
                 self.create_count += 1
             else:
                 self.modify_count += 1
             
             if name == 'node':
                 entity.location = Point(self.node_location, srid=4326)
                 entity.geometry = entity.location
             elif name == 'way':
                 cls = LinearRing if self.nodes[0] == self.nodes[-1] else LineString
                 entity.geometry = cls([self.node_locations[n] for n in self.nodes], srid=4326)
                 min_, max_ = (float('inf'), float('inf')), (float('-inf'), float('-inf'))
                 for lon, lat in [self.node_locations[n] for n in self.nodes]:
                     min_ = min(min_[0], lon), min(min_[1], lat) 
                     max_ = max(max_[0], lon), max(max_[1], lat)
                 entity.location = Point( (min_[0]+max_[0])/2 , (min_[1]+max_[1])/2 , srid=4326)
             else:
                 raise AssertionError("There should be no other types of entity we're to deal with.")
             
             names = dict()
             
             for lang_code, lang_name in settings.LANGUAGES:
                 with override(lang_code):
                 
                     if '-' in lang_code:
                         tags_to_try = ('name:%s' % lang_code, 'name:%s' % lang_code.split('-')[0], 'name', 'operator')
                     else:
                         tags_to_try = ('name:%s' % lang_code, 'name', 'operator')
                         name = None
                         for tag_to_try in tags_to_try:
                             if self.tags.get(tag_to_try):
                                 name = self.tags.get(tag_to_try)
                                 break
                     
                     if name is None:
                         try:
                             name = reverse_geocode(*entity.location)[0]['name']
                             if not name:
                                 raise IndexError
                             name = u"↝ %s" % name
                         except IndexError:
                             name = u"↝ %f, %f" % (self.node_location[1], self.node_location[0])
                     
                     names[lang_code] = name
             
             entity.metadata['osm'] = {
                 'attrs': dict(self.attrs),
                 'tags': dict(zip((k.replace(':', '_') for k in self.tags.keys()), self.tags.values()))
             }
             entity.primary_type = self.entity_types[types[0]]
             
             entity.save(identifiers={'osm': self.id})
             
             for lang_code, name in names.items():
                 set_name_in_language(entity, lang_code, title=name)
             
             entity.all_types = [self.entity_types[et] for et in types]
             entity.update_all_types_completion()
         
         else:
             self.unchanged_count += 1
示例#24
0
    def _import_cif(self, cif):
        """
        Parse a CIF file
        """

        # Clear cache once per file - avoid high memory usage
        self._cache = EntityCache()

        # Also reset SQL queries log
        reset_queries()

        routes = []

        this_journey = None

        for line in cif:

            if line[:2] == 'QS':
                # Journey header
                if this_journey is not None:
                    routes[-1]['journies'].append(this_journey)
                if line[2] == 'D':
                    this_journey = None
                    continue
                this_journey = {
                    'operator-code':
                    line[3:7],
                    'id':
                    line[7:13],
                    'start-date':
                    self._parse_cif_date(line[13:21]),
                    'end-date':
                    self._parse_cif_date(line[21:29]),
                    'days':
                    weekbool(
                        line[29] == '1',  # Monday
                        line[30] == '1',  # Tuesday
                        line[31] == '1',  # Wednesday
                        line[32] == '1',  # Thursday
                        line[33] == '1',  # Friday
                        line[34] == '1',  # Saturday
                        line[35] == '1',  # Sunday
                    ),
                    'school-holidays': {
                        'S': 'term-time',
                        'H': 'holidays'
                    }.get(line[36], 'all'),
                    'bank-holidays': {
                        'A': 'additional',
                        'B': 'holidays',
                        'X': 'non-holidays'
                    }.get(line[37], 'all'),
                    'route':
                    line[38:42],
                    'vehicle':
                    line[48:56].strip(),
                    'direction':
                    line[64],
                    'notes': [],
                    'stops': [],
                }

            elif line[:2] in ('QN', 'ZN'):
                # Notes
                this_journey['notes'].append(line[7:])

            elif line[:2] == 'QO':
                # Journey start
                try:
                    this_journey['stops'].append({
                        'entity':
                        self._cache['atco:%s' % line[2:14].strip()],
                        'sta':
                        None,
                        'std':
                        self._parse_cif_time(line[14:18]),
                        'activity':
                        'O',
                        'estimated':
                        line[22] == '0',
                        'fare-stage':
                        line[24] == '1'
                    })
                except Http404:
                    pass

            elif line[:2] == 'QI':
                # Journey intermediate stop
                try:
                    this_journey['stops'].append({
                        'entity':
                        self._cache['atco:%s' % line[2:14].strip()],
                        'sta':
                        self._parse_cif_time(line[14:18]),
                        'std':
                        self._parse_cif_time(line[18:22]),
                        'activity':
                        line[22],
                        'estimated':
                        line[27] == '0',
                        'fare-stage':
                        line[29] == '1'
                    })
                except Http404:
                    pass

            elif line[:2] == 'QT':
                # Journey complete
                try:
                    this_journey['stops'].append({
                        'entity':
                        self._cache['atco:%s' % line[2:14].strip()],
                        'sta':
                        self._parse_cif_time(line[14:18]),
                        'std':
                        None,
                        'activity':
                        'F',
                        'estimated':
                        line[22] == '0',
                        'fare-stage':
                        line[24] == '1'
                    })
                except Http404:
                    pass

            elif line[:2] == 'ZL':
                # Route ID
                route_id = line[2:]

            elif line[:2] == 'ZD':
                # Days route ID
                route_id += line[18:-1]

            elif line[:2] == 'ZS':
                # Route

                if this_journey is not None:
                    routes[-1]['journies'].append(this_journey)

                routes.append({
                    'id': route_id,
                    'number': line[10:14].strip(),
                    'description': line[14:-1],
                    'stops': [],
                    'journies': []
                })

            elif line[:2] == 'ZA':

                stop_code = line[3:15].strip()

                try:
                    entity = self._cache['atco:%s' % stop_code]
                    if entity.source == self._get_source():
                        # Raise Http404 if this is a bus stop we came up with,
                        # so any name changes, etc, get processed
                        raise Http404()
                except Http404:
                    # Out of zone bus stops with NaPTAN codes
                    try:
                        entity = Entity.objects.get(
                            source=self._get_source(),
                            _identifiers__scheme='atco',
                            _identifiers__value=stop_code)
                    except Entity.DoesNotExist:
                        entity = Entity(source=self._get_source())
                    identifiers = {'atco': stop_code}
                    entity_type = self._entity_type
                    entity.primary_type = entity_type
                    entity.save(identifiers=identifiers)
                    set_name_in_language(entity,
                                         'en',
                                         title=line[15:63].strip())
                    entity.all_types = (entity_type, )
                    entity.update_all_types_completion()
                    entity.save()
                routes[-1]['stops'].append(entity)

        if this_journey is not None:
            routes[-1]['journies'].append(this_journey)

        return routes
示例#25
0
 def add_stop(self, meta, entity_type, source, is_entrance):
     
     # Check this entity is in an area
     if self.areas != None:
         in_area = False
         for area in self.areas:
             if meta['atco-code'].startswith(area):
                 in_area = True
         if not in_area:
             return
     
     # See if we're updating an existing object, or creating a new one
     try:
         entity = Entity.objects.get(source=source,
                                     _identifiers__scheme='atco',
                                     _identifiers__value=meta['atco-code'])
     except Entity.DoesNotExist:
         entity = Entity(source=source)
     except Entity.MultipleObjectsReturned:
         # Handle clashes
         Entity.objects.filter(source=source,
                              _identifiers__scheme='atco',
                              _identifiers__value=meta['atco-code']).delete()
         entity = Entity(source=source)
     
     common_name, indicator, locality, street = [meta.get(k) for k in
                 ('common-name', 'indicator', 'locality-ref', 'street')]
     
     if (common_name or '').endswith(' DEL') or \
        (indicator or '').lower() == 'not in use' or \
        'to define route' in (common_name or '') or \
        'to def rte' in (common_name or '') or \
        'to def route' in (common_name or '') or \
        'def.rte' in (common_name or ''):
         # In the NaPTAN list, but indicates it's an unused stop
         return
     
     if self.meta['stop-type'] in ('MET','GAT','FER', 'RLY'):
         names = self.names
     else:
         
         names = dict()
         
         for lang_code, lang_name in settings.LANGUAGES:
             with override(lang_code):
                 
                 # Try and find one in our preferred order
                 for lang in (lang_code, 'en', None):
                     if lang in self.names:
                         common_name = self.names[lang]
                         break
             
                 # Expand abbreviations in indicators
                 if indicator is not None:
                     parts = []
                     for part in indicator.split():
                         parts.append({
                             # Translators: This is referring to bus stop location descriptions
                             'op': ugettext('Opposite'),
                             'opp': ugettext('Opposite'),
                             'opposite': ugettext('Opposite'),
                             # Translators: This is referring to bus stop location descriptions
                             'adj': ugettext('Adjacent'),
                             # Translators: This is referring to bus stop location descriptions
                             'outside': ugettext('Outside'),
                             'o/s': ugettext('Outside'),
                             # Translators: This is referring to bus stop location descriptions
                             'nr': ugettext('Near'),
                             # Translators: This is referring to bus stop location descriptions
                             'inside': ugettext('Inside'),
                             # Translators: This is referring to bus stop location descriptions
                             'stp': ugettext('Stop'),
                         }.get(part.lower(), part))
                     indicator = ' '.join(parts)
                 
                 if indicator is None and self.meta['stop-type'] in ('AIR', 'FTD', 'RSE', 'TMU', 'BCE'):
                     # Translators: This is referring to public transport entities
                     title = ugettext('Entrance to %s') % common_name
                 
                 elif indicator is None and self.meta['stop-type'] in ('FBT',):
                     # Translators: This is referring to ferry ports
                     title = ugettext('Berth at %s') % common_name
                 
                 elif indicator is None and self.meta['stop-type'] in ('RPL','PLT'):
                     # Translators: This is referring to rail and metro stations
                     title = ugettext('Platform at %s') % common_name
                 
                 elif indicator is not None and indicator.lower() != 'none' \
                     and indicator not in common_name:
                     title = indicator + ' ' + common_name
                 
                 else:
                     title = common_name
                 
                 if street not in (None, '-', '---'):
                     # Deal with all-caps street names
                     if street.upper() == street:
                         fixedstreet = ''
                         wordstart = True
                         for letter in street:
                             if wordstart:
                                 wordstart = False
                                 fixedstreet += letter
                                 continue
                             elif letter == ' ':
                                 wordstart = True
                                 fixedstreet += letter
                                 continue
                             else:
                                 fixedstreet += letter.lower()
                         street = fixedstreet
                     
                     if street not in title:
                         title += ', ' + street
                 
                 locality_lang = self.nptg_localities.get(locality)
                 if locality_lang != None:
                     for lang in (lang_code, 'en', 'cy'):
                         if lang in locality_lang:
                             if locality_lang[lang] != street:
                                 title += ', ' + locality_lang[lang]
                             break
                 
                 names[lang_code] = title
     
     entity.primary_type = entity_type
     entity.is_entrance = is_entrance
     
     if not entity.metadata:
         entity.metadata = {}
     entity.metadata['naptan'] = meta
     entity.location = Point(float(meta['longitude']), float(meta['latitude']), srid=4326)
     entity.geometry = entity.location
     
     if meta['atco-code'] in self.tube_references:
         entity.metadata['london-underground-identifiers'] = self.tube_references[meta['atco-code']]
     
     identifiers = {
         'atco': meta['atco-code'],
     }
     if 'naptan-code' in meta:
         meta['naptan-code'] = ''.join(map(self.naptan_dial, meta['naptan-code']))
         identifiers['naptan'] = meta['naptan-code']
     if 'plate-code' in meta:
         identifiers['plate'] = meta['plate-code']
     if 'crs' in meta:
         identifiers['crs'] = meta['crs']
     if 'tiploc' in meta:
         identifiers['tiploc'] = meta['tiploc']
     if indicator != None and re.match('Stop [A-Z]\d\d?', indicator):
         identifiers['stop'] = indicator[5:]
     
     entity.save(identifiers=identifiers)
     
     for lang_code, name in names.items():
         # This is the NaPTAN, so default to English
         if lang_code is None: lang_code = 'en'
         set_name_in_language(entity, lang_code, title=name)
     
     entity.all_types = (entity_type,)
     entity.update_all_types_completion()
     entity.groups.clear()
     for stop_area in self.stop_areas:
         sa, created = EntityGroup.objects.get_or_create(source=source, ref_code=stop_area)
         entity.groups.add(sa)
     entity.save()
     
     return entity
示例#26
0
    def add_stop(self, meta, entity_type, source):

        # Check this entity is in an area
        if self.areas != None:
            in_area = False
            for area in self.areas:
                if meta['atco-code'].startswith(area):
                    in_area = True
            if not in_area:
                return

        # See if we're updating an existing object, or creating a new one
        try:
            entity = Entity.objects.get(source=source,
                                        _identifiers__scheme='atco',
                                        _identifiers__value=meta['atco-code'])
        except Entity.DoesNotExist:
            entity = Entity(source=source)
        except Entity.MultipleObjectsReturned:
            # Handle clashes
            Entity.objects.filter(
                source=source,
                _identifiers__scheme='atco',
                _identifiers__value=meta['atco-code']).delete()
            entity = Entity(source=source)

        common_name, indicator, locality, street = [
            meta.get(k)
            for k in ('common-name', 'indicator', 'locality-ref', 'street')
        ]

        if (common_name or '').endswith(' DEL') or \
           (indicator or '').lower() == 'not in use' or \
           'to define route' in (common_name or '') or \
           'to def rte' in (common_name or '') or \
           'to def route' in (common_name or '') or \
           'def.rte' in (common_name or ''):
            # In the NaPTAN list, but indicates it's an unused stop
            return

        if self.meta['stop-type'] in ('MET', 'GAT', 'FER', 'RLY'):
            names = self.names
        else:

            names = dict()

            for lang_code, lang_name in settings.LANGUAGES:
                with override(lang_code):

                    # Try and find one in our preferred order
                    for lang in (lang_code, 'en', None):
                        if lang in self.names:
                            common_name = self.names[lang]
                            break

                    # Expand abbreviations in indicators
                    if indicator is not None:
                        parts = []
                        for part in indicator.split():
                            parts.append({
                                # Translators: This is referring to bus stop location descriptions
                                'op': ugettext('Opposite'),
                                'opp': ugettext('Opposite'),
                                'opposite': ugettext('Opposite'),
                                # Translators: This is referring to bus stop location descriptions
                                'adj': ugettext('Adjacent'),
                                # Translators: This is referring to bus stop location descriptions
                                'outside': ugettext('Outside'),
                                'o/s': ugettext('Outside'),
                                # Translators: This is referring to bus stop location descriptions
                                'nr': ugettext('Near'),
                                # Translators: This is referring to bus stop location descriptions
                                'inside': ugettext('Inside'),
                                # Translators: This is referring to bus stop location descriptions
                                'stp': ugettext('Stop'),
                            }.get(part.lower(), part))
                        indicator = ' '.join(parts)

                    if indicator is None and self.meta['stop-type'] in (
                            'AIR', 'FTD', 'RSE', 'TMU', 'BCE'):
                        # Translators: This is referring to public transport entities
                        title = ugettext('Entrance to %s') % common_name

                    elif indicator is None and self.meta['stop-type'] in (
                            'FBT', ):
                        # Translators: This is referring to ferry ports
                        title = ugettext('Berth at %s') % common_name

                    elif indicator is None and self.meta['stop-type'] in (
                            'RPL', 'PLT'):
                        # Translators: This is referring to rail and metro stations
                        title = ugettext('Platform at %s') % common_name

                    elif indicator is not None and indicator.lower() != 'none' \
                        and indicator not in common_name:
                        title = indicator + ' ' + common_name

                    else:
                        title = common_name

                    if street != None and street != '-':
                        # Deal with all-caps street names
                        if street.upper() == street:
                            fixedstreet = ''
                            wordstart = True
                            for letter in street:
                                if wordstart:
                                    wordstart = False
                                    fixedstreet += letter
                                    continue
                                elif letter == ' ':
                                    wordstart = True
                                    fixedstreet += letter
                                    continue
                                else:
                                    fixedstreet += letter.lower()
                            street = fixedstreet

                        if street not in title:
                            title += ', ' + street

                    locality_lang = self.nptg_localities.get(locality)
                    if locality_lang != None:
                        for lang in (lang_code, 'en', 'cy'):
                            if lang in locality_lang:
                                if locality_lang[lang] != street:
                                    title += ', ' + locality_lang[lang]
                                break

                    names[lang_code] = title

        entity.primary_type = entity_type

        if not entity.metadata:
            entity.metadata = {}
        entity.metadata['naptan'] = meta
        entity.location = Point(float(meta['longitude']),
                                float(meta['latitude']),
                                srid=4326)
        entity.geometry = entity.location

        if meta['atco-code'] in self.tube_references:
            entity.metadata[
                'london-underground-identifiers'] = self.tube_references[
                    meta['atco-code']]

        identifiers = {
            'atco': meta['atco-code'],
        }
        if 'naptan-code' in meta:
            meta['naptan-code'] = ''.join(
                map(self.naptan_dial, meta['naptan-code']))
            identifiers['naptan'] = meta['naptan-code']
        if 'plate-code' in meta:
            identifiers['plate'] = meta['plate-code']
        if 'crs' in meta:
            identifiers['crs'] = meta['crs']
        if indicator != None and re.match('Stop [A-Z]\d\d?', indicator):
            identifiers['stop'] = indicator[5:]

        entity.save(identifiers=identifiers)

        for lang_code, name in names.items():
            # This is the NaPTAN, so default to English
            if lang_code is None: lang_code = 'en'
            set_name_in_language(entity, lang_code, title=name)

        entity.all_types = (entity_type, )
        entity.update_all_types_completion()
        entity.groups.clear()
        for stop_area in self.stop_areas:
            sa, created = EntityGroup.objects.get_or_create(source=source,
                                                            ref_code=stop_area)
            entity.groups.add(sa)
        entity.save()

        return entity
示例#27
0
 def _import_cif(self, cif):
     """
     Parse a CIF file
     """
     
     # Clear cache once per file - avoid high memory usage
     self._cache = EntityCache()
     
     # Also reset SQL queries log
     reset_queries()
     
     routes = []
     
     this_journey = None
     
     for line in cif:
         
         if line[:2] == 'QS':
             # Journey header
             if this_journey is not None:
                 routes[-1]['journies'].append(this_journey)
             if line[2] == 'D':
                 this_journey = None
                 continue
             this_journey = {
                 'operator-code': line[3:7],
                 'id': line[7:13],
                 'start-date': self._parse_cif_date(line[13:21]),
                 'end-date': self._parse_cif_date(line[21:29]),
                 'days': weekbool(
                     line[29] == '1', # Monday
                     line[30] == '1', # Tuesday
                     line[31] == '1', # Wednesday
                     line[32] == '1', # Thursday
                     line[33] == '1', # Friday
                     line[34] == '1', # Saturday
                     line[35] == '1', # Sunday
                 ),
                 'school-holidays': {
                     'S': 'term-time',
                     'H': 'holidays'
                 }.get(line[36], 'all'),
                 'bank-holidays': {
                     'A': 'additional',
                     'B': 'holidays',
                     'X': 'non-holidays'
                 }.get(line[37], 'all'),
                 'route': line[38:42],
                 'vehicle': line[48:56].strip(),
                 'direction': line[64],
                 'notes': [],
                 'stops': [],
             }
         
         elif line[:2] in ('QN', 'ZN'):
             # Notes
             this_journey['notes'].append(line[7:])
         
         elif line[:2] == 'QO':
             # Journey start
             try:
                 this_journey['stops'].append({
                     'entity': self._cache['atco:%s' % line[2:14].strip()],
                     'sta': None,
                     'std': self._parse_cif_time(line[14:18]),
                     'activity': 'O',
                     'estimated': line[22] == '0',
                     'fare-stage': line[24] == '1'
                 })
             except Http404:
                 pass
         
         elif line[:2] == 'QI':
             # Journey intermediate stop
             try:
                 this_journey['stops'].append({
                     'entity': self._cache['atco:%s' % line[2:14].strip()],
                     'sta': self._parse_cif_time(line[14:18]),
                     'std': self._parse_cif_time(line[18:22]),
                     'activity': line[22],
                     'estimated': line[27] == '0',
                     'fare-stage': line[29] == '1'
                 })
             except Http404:
                 pass
         
         elif line[:2] == 'QT':
             # Journey complete
             try:
                 this_journey['stops'].append({
                     'entity': self._cache['atco:%s' % line[2:14].strip()],
                     'sta': self._parse_cif_time(line[14:18]),
                     'std': None,
                     'activity': 'F',
                     'estimated': line[22] == '0',
                     'fare-stage': line[24] == '1'
                 })
             except Http404:
                 pass
         
         elif line[:2] == 'ZL':
             # Route ID
             route_id = line[2:]
         
         elif line[:2] == 'ZD':
             # Days route ID
             route_id += line[18:-1]
         
         elif line[:2] == 'ZS':
             # Route
             
             if this_journey is not None:
                 routes[-1]['journies'].append(this_journey)
             
             routes.append({
                 'id': route_id,
                 'number': line[10:14].strip(),
                 'description': line[14:-1],
                 'stops': [],
                 'journies': []
             })
         
         elif line[:2] == 'ZA':
             
             stop_code = line[3:15].strip()
             
             try:
                 entity = self._cache['atco:%s' % stop_code]
                 if entity.source == self._get_source():
                     # Raise Http404 if this is a bus stop we came up with,
                     # so any name changes, etc, get processed
                     raise Http404()
             except Http404:
                 # Out of zone bus stops with NaPTAN codes
                 try:
                     entity = Entity.objects.get(source=self._get_source(),
                                                 _identifiers__scheme='atco',
                                                 _identifiers__value=stop_code)
                 except Entity.DoesNotExist:
                     entity = Entity(source=self._get_source())
                 identifiers = { 'atco': stop_code }
                 entity_type = self._entity_type
                 entity.primary_type = entity_type
                 entity.save(identifiers=identifiers)
                 set_name_in_language(entity, 'en', title=line[15:63].strip())
                 entity.all_types = (entity_type,)
                 entity.update_all_types_completion()
                 entity.save()
             routes[-1]['stops'].append(entity)
     
     if this_journey is not None:
         routes[-1]['journies'].append(this_journey)
     
     return routes
示例#28
0
 def _scrape(self, route, url, output):
     url += '&showall=1'
     service = etree.parse(urlopen(url), parser = etree.HTMLParser())
     route.stops.clear()
     for i, tr in enumerate(service.find('.//table').findall('tr')[1:]):
         
         try:
             stop_code = tr[1][0].text
         except IndexError:
             
             # Stops on ACIS Live that don't have codes, e.g., out of county
             # stops
             stop_name = tr[3][0].text
             try:
                 entity = Entity.objects.get(source=self._get_source(),
                                             _identifiers__scheme='acisroute',
                                             _identifiers__value=stop_name)
             except Entity.DoesNotExist:
                 entity = Entity(source=self._get_source())
             
             entity_type = self._get_entity_type()
             entity.primary_type = entity_type
             identifiers = { 'acisroute': stop_name }
             entity.save(identifiers=identifiers)
             set_name_in_language(entity, 'en', title=stop_name)
             entity.all_types = (entity_type,)
             entity.update_all_types_completion()
         
         else:
             if stop_code.startswith('693') or stop_code.startswith('272') \
               or stop_code.startswith('734') or stop_code.startswith('282'):
                 # Oxontime uses NaPTAN code
                 scheme = 'naptan'
             elif stop_code.startswith('450'):
                 # West Yorkshire uses plate code
                 scheme = 'plate'
             else:
                 # Everyone else uses ATCO
                 scheme = 'atco'
                 if stop_code.startswith('370'):
                     # Except South Yorkshire, which mangles the code
                     stop_code = '3700%s' % stop_code[3:]
             try:
                 entity = get_entity(scheme, stop_code)
                 if entity.source == self._get_source():
                     # Raise Http404 if this is a bus stop we came up with,
                     # so any name changes, etc, get processed
                     raise Http404()
             except Http404:
                 # Out of zone bus stops with NaPTAN codes - alternatively,
                 # the fake bus stops Oxontime made up for the TUBE route
                 try:
                     entity = Entity.objects.get(source=self._get_source(),
                                                 _identifiers__scheme=scheme,
                                                 _identifiers__value=stop_code)
                 except Entity.DoesNotExist:
                     entity = Entity(source=self._get_source())
                 identifiers = {scheme: stop_code}
                 entity_type = self._get_entity_type()
                 entity.primary_type = entity_type
                 entity.save(identifiers=identifiers)
                 set_name_in_language(entity, 'en', title=tr[3][0].text)
                 entity.all_types = (entity_type,)
                 entity.update_all_types_completion()
                 entity.save()
             
         StopOnRoute.objects.create(route=route, entity=entity, order=i)
示例#29
0
    def endElement(self, name):
        if name in ('node', 'way') and self.valid:
            try:
                types = self.find_types(self.tags)
            except ValueError:
                self.ignore_count += 1
                return

            # Ignore ways that lay partly outside our bounding box
            if name == 'way' and not all(id in self.node_locations
                                         for id in self.nodes):
                return

            # Ignore disused and under-construction entities
            if self.tags.get('life_cycle',
                             'in_use') != 'in_use' or self.tags.get(
                                 'disused') in ('1', 'yes', 'true'):
                return

            # Memory management in debug mode
            reset_queries()

            if self.id in self.identities:
                entity = get_entity(*self.identities[self.id].split(':'))

                entity.metadata['osm'] = {
                    'attrs':
                    dict(self.attrs),
                    'tags':
                    dict(
                        zip((k.replace(':', '_') for k in self.tags.keys()),
                            self.tags.values()))
                }

                identifiers = entity.identifiers
                identifiers.update({'osm': self.id})
                entity.save(identifiers=identifiers)
                entity.all_types = set(entity.all_types.all()) | set(
                    self.entity_types[et] for et in types)
                entity.update_all_types_completion()
                self.ids.remove(self.id)

            else:
                try:
                    entity = Entity.objects.get(source=self.source,
                                                _identifiers__scheme='osm',
                                                _identifiers__value=self.id)
                    created = False
                except Entity.DoesNotExist:
                    entity = Entity(source=self.source)
                    created = True

                if not 'osm' in entity.metadata or \
                  entity.metadata['osm'].get('attrs', {}).get('timestamp', '') < self.attrs['timestamp']:

                    if created:
                        self.create_count += 1
                    else:
                        self.modify_count += 1

                    if name == 'node':
                        entity.location = Point(self.node_location, srid=4326)
                        entity.geometry = entity.location
                    elif name == 'way':
                        cls = LinearRing if self.nodes[0] == self.nodes[
                            -1] else LineString
                        entity.geometry = cls(
                            [self.node_locations[n] for n in self.nodes],
                            srid=4326)
                        min_, max_ = (float('inf'),
                                      float('inf')), (float('-inf'),
                                                      float('-inf'))
                        for lon, lat in [
                                self.node_locations[n] for n in self.nodes
                        ]:
                            min_ = min(min_[0], lon), min(min_[1], lat)
                            max_ = max(max_[0], lon), max(max_[1], lat)
                        entity.location = Point((min_[0] + max_[0]) / 2,
                                                (min_[1] + max_[1]) / 2,
                                                srid=4326)
                    else:
                        raise AssertionError(
                            "There should be no other types of entity we're to deal with."
                        )

                    names = dict()

                    for lang_code, lang_name in settings.LANGUAGES:
                        with override(lang_code):

                            if '-' in lang_code:
                                tags_to_try = ('name:%s' % lang_code,
                                               'name:%s' %
                                               lang_code.split('-')[0], 'name',
                                               'operator')
                            else:
                                tags_to_try = ('name:%s' % lang_code, 'name',
                                               'operator')
                                name = None
                                for tag_to_try in tags_to_try:
                                    if self.tags.get(tag_to_try):
                                        name = self.tags.get(tag_to_try)
                                        break

                            if name is None:
                                try:
                                    name = reverse_geocode(
                                        *entity.location)[0]['name']
                                    if not name:
                                        raise IndexError
                                    name = u"↝ %s" % name
                                except IndexError:
                                    name = u"↝ %f, %f" % (
                                        self.node_location[1],
                                        self.node_location[0])

                            names[lang_code] = name

                    entity.metadata['osm'] = {
                        'attrs':
                        dict(self.attrs),
                        'tags':
                        dict(
                            zip((k.replace(':', '_')
                                 for k in self.tags.keys()),
                                self.tags.values()))
                    }
                    entity.primary_type = self.entity_types[types[0]]

                    identifiers = entity.identifiers
                    identifiers.update({'osm': self.id})
                    entity.save(identifiers=identifiers)

                    for lang_code, name in names.items():
                        set_name_in_language(entity, lang_code, title=name)

                    entity.all_types = [self.entity_types[et] for et in types]
                    entity.update_all_types_completion()

                else:
                    self.unchanged_count += 1
示例#30
0
    def _scrape(self, route, url, output):
        url += '&showall=1'
        service = etree.parse(urlopen(url), parser=etree.HTMLParser())
        route.stops.clear()
        for i, tr in enumerate(service.find('.//table').findall('tr')[1:]):

            try:
                stop_code = tr[1][0].text
            except IndexError:

                # Stops on ACIS Live that don't have codes, e.g., out of county
                # stops
                stop_name = tr[3][0].text
                try:
                    entity = Entity.objects.get(
                        source=self._get_source(),
                        _identifiers__scheme='acisroute',
                        _identifiers__value=stop_name)
                except Entity.DoesNotExist:
                    entity = Entity(source=self._get_source())

                entity_type = self._get_entity_type()
                entity.primary_type = entity_type
                identifiers = {'acisroute': stop_name}
                entity.save(identifiers=identifiers)
                set_name_in_language(entity, 'en', title=stop_name)
                entity.all_types = (entity_type, )
                entity.update_all_types_completion()

            else:
                if stop_code.startswith('693') or stop_code.startswith('272') \
                  or stop_code.startswith('734') or stop_code.startswith('282'):
                    # Oxontime uses NaPTAN code
                    scheme = 'naptan'
                elif stop_code.startswith('450'):
                    # West Yorkshire uses plate code
                    scheme = 'plate'
                else:
                    # Everyone else uses ATCO
                    scheme = 'atco'
                    if stop_code.startswith('370'):
                        # Except South Yorkshire, which mangles the code
                        stop_code = '3700%s' % stop_code[3:]
                try:
                    entity = get_entity(scheme, stop_code)
                    if entity.source == self._get_source():
                        # Raise Http404 if this is a bus stop we came up with,
                        # so any name changes, etc, get processed
                        raise Http404()
                except Http404:
                    # Out of zone bus stops with NaPTAN codes - alternatively,
                    # the fake bus stops Oxontime made up for the TUBE route
                    try:
                        entity = Entity.objects.get(
                            source=self._get_source(),
                            _identifiers__scheme=scheme,
                            _identifiers__value=stop_code)
                    except Entity.DoesNotExist:
                        entity = Entity(source=self._get_source())
                    identifiers = {scheme: stop_code}
                    entity_type = self._get_entity_type()
                    entity.primary_type = entity_type
                    entity.save(identifiers=identifiers)
                    set_name_in_language(entity, 'en', title=tr[3][0].text)
                    entity.all_types = (entity_type, )
                    entity.update_all_types_completion()
                    entity.save()

            StopOnRoute.objects.create(route=route, entity=entity, order=i)