def _get_entity_types(self): entity_types = {} new_entity_types = set() for slug, et in self._entity_types.items(): et_category, created = EntityTypeCategory.objects.get_or_create(name=et['category']) try: entity_type = EntityType.objects.get(slug=slug) created = False except EntityType.DoesNotExist: entity_type = EntityType(slug=slug) created = True entity_type.category = et_category entity_type.slug = slug if created: entity_type.show_in_nearby_list = et['show_in_nearby_list'] entity_type.show_in_category_list = et['show_in_category_list'] entity_type.save() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): set_name_in_language(entity_type, lang_code, verbose_name=_(et['verbose_name']), verbose_name_singular=_(et['verbose_name_singular']), verbose_name_plural=_(et['verbose_name_plural'])) new_entity_types.add(slug) entity_types[slug] = entity_type for slug in new_entity_types: subtype_of = self._entity_types[slug]['parent-types'] entity_types[slug].subtype_of.clear() for s in subtype_of: entity_types[slug].subtype_of.add(entity_types[s]) entity_types[slug].save() return entity_types
def import_data(self, metadata, output): source, entity_type = self._get_source(), self._get_entity_type() parser = etree.XMLParser(load_dtd=True) parser.resolvers.add(BBCTPEGResolver()) xml = etree.parse(urllib.urlopen(self._tpeg_url), parser=parser) entities, seen = {}, set() for entity in Entity.objects.filter(source=source): if 'bbc-tpeg' in entity.identifiers: entities[entity.identifiers['bbc-tpeg']] = entity for message in xml.getroot().findall('tpeg_message'): id = message.find('road_traffic_message').attrib['message_id'] road_traffic_message = message.find('road_traffic_message') try: entity = entities[id] except KeyError: entity = Entity() entities[id] = entity entity.source = source entity.primary_type = entity_type locs = map(self._wgs84_to_point, road_traffic_message.findall('location_container/location_coordinates/WGS84')) if len(locs) > 1: entity.geometry = LineString(*locs) elif len(locs) == 1: entity.geometry = locs[0] else: continue entity.location = Point( sum(p.x for p in locs) / len(locs), sum(p.y for p in locs) / len(locs), srid=4326, ) entity.metadata['bbc_tpeg'] = { 'xml': etree.tostring(message), 'severity': road_traffic_message.attrib['severity_factor'], 'generated': road_traffic_message.attrib['message_generation_time'], 'version': int(road_traffic_message.attrib['version_number']), } entity.save(identifiers={'bbc-tpeg': id}) set_name_in_language(entity, 'en', title=message.find('summary').text) entity.all_types = [entity_type] entity.update_all_types_completion() seen.add(entity.pk) for entity in Entity.objects.filter(source=source): if not entity.pk in seen: entity.delete()
def _load_from_csv(self, reader, entity_type, source): j = 0 for i, line in enumerate(reader): postcode_abbrev, (easting, northing) = line[0], line[10:12] postcode_abbrev = postcode_abbrev.replace(' ', '') # Now try to figure out where to put the space in if re.match(r'[A-Z][0-9]{2}[A-Z]{2}', postcode_abbrev): # A9 9AA postcode = '%s %s' % (postcode_abbrev[:2], postcode_abbrev[2:]) elif re.match(r'[A-Z][0-9]{3}[A-Z]{2}', postcode_abbrev): # A99 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9]{2}[A-Z]{2}', postcode_abbrev): # AA9 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9]{3}[A-Z]{2}', postcode_abbrev): # AA99 9AA postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:]) elif re.match(r'[A-Z][0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev): # A9A 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev): # AA9A 9AA postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:]) else: postcode = postcode_abbrev try: easting, northing = int(easting), int(northing) except ValueError: continue j += 1 try: entity = Entity.objects.get(source=source, _identifiers__scheme='postcode', _identifiers__value=postcode_abbrev) except Entity.DoesNotExist: entity = Entity(source=source) entity.location = Point(easting, northing, srid=3857) entity.geometry = entity.location entity.primary_type = entity_type identifiers = { 'postcode': postcode_abbrev, 'postcode-canonical': postcode, } entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=postcode) entity.all_types.add(entity_type) entity.update_all_types_completion()
def _get_entity_type(self): category, created = EntityTypeCategory.objects.get_or_create(name=ugettext_noop('Uncategorised')) entity_type, created = EntityType.objects.get_or_create( slug='post-code', category=category) entity_type.slug = 'post-code' if created: entity_type.show_in_nearby_list = False entity_type.show_in_category_list = False entity_type.save() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): set_name_in_language(entity_type, lang_code, verbose_name=_('postcode'), verbose_name_singular=_('a postcode'), verbose_name_plural=_('postcodes')) return entity_type
def endElement(self, name): self.name_stack.pop() if name == "StopPoint": try: # Classify metro stops according to their particular system if self.meta["stop-type"] == "MET": try: entity_type = self.entity_types[self.meta["stop-type"] + ":" + self.meta["atco-code"][6:8]] except KeyError: entity_type = self.entity_types["MET"] else: entity_type = self.entity_types[self.meta["stop-type"]] except KeyError: pass else: entity = self.add_stop(self.meta, entity_type, self.source) if entity: self.entities.add(entity) elif name == "StopAreaRef": self.stop_areas.append(self.meta["stop-area"]) del self.meta["stop-area"] elif name == "StopArea": if self.areas != None: in_area = False for area in self.areas: if self.meta["area-code"].startswith(area): in_area = True if not in_area: return sa, created = EntityGroup.objects.get_or_create(source=self.source, ref_code=self.meta["area-code"]) sa.save() for lang_code, name in self.names.items(): if lang_code is None: lang_code = "en" set_name_in_language(sa, lang_code, title=name) elif name == "CommonName": if self.lang not in self.names: self.names[self.lang] = self.meta["common-name"] elif name == "Name" and self.meta["name"] != "": if self.lang not in self.names: self.names[self.lang] = self.meta["name"]
def _get_entity_types(self): entity_types = {} category, created = EntityTypeCategory.objects.get_or_create(name=_("Transport")) category.save() for stop_type in self.entity_type_definitions: et = self.entity_type_definitions[stop_type] try: entity_type = EntityType.objects.get(slug=et["slug"]) except EntityType.DoesNotExist: entity_type = EntityType(slug=et["slug"]) entity_type.category = category entity_type.uri = "http://mollyproject.org/schema/maps#%s" % et["uri-local"] if created: entity_type.show_in_nearby_list = et["nearby"] entity_type.show_in_category_list = et["category"] entity_type.save() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): set_name_in_language( entity_type, lang_code, verbose_name=ugettext(et["verbose-name"]), verbose_name_singular=ugettext(et["verbose-name-singular"]), verbose_name_plural=ugettext(et["verbose-name-plural"]), ) entity_types[stop_type] = entity_type for stop_type, entity_type in entity_types.items(): if entity_type.slug == "public-transport-access-node": continue entity_type.subtype_of.add(entity_types[None]) if ( stop_type.startswith("MET") and stop_type != "MET" and entity_type.slug != self.RAIL_STATION_DEFINITION["slug"] ): entity_type.subtype_of.add(entity_types["MET"]) return entity_types
def _get_entity_type(self): try: entity_type = EntityType.objects.get(slug='travel-alert') created = False except EntityType.DoesNotExist: entity_type = EntityType(slug='travel-alert') created = True category, etc_created = EntityTypeCategory.objects.get_or_create(name=ugettext_noop('Transport')) if created: entity_type.show_in_nearby_list = False entity_type.show_in_category_list = False entity_type.category = category entity_type.save() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): set_name_in_language(entity_type, lang_code, verbose_name=_('travel alert'), verbose_name_singular=_('a travel alert'), verbose_name_plural=_('travel alerts')) return entity_type
def endElement(self, name): if name in ('node', 'way') and self.valid: try: types = self.find_types(self.tags) except ValueError: self.ignore_count += 1 return # Ignore ways that lay partly outside our bounding box if name == 'way' and not all(id in self.node_locations for id in self.nodes): return # We already have these from OxPoints, so leave them alone. if self.tags.get('amenity') == 'library' and self.tags.get('operator') == 'University of Oxford': return # Ignore disused and under-construction entities if self.tags.get('life_cycle', 'in_use') != 'in_use' or self.tags.get('disused') in ('1', 'yes', 'true'): return reset_queries() try: entity = Entity.objects.get(source=self.source, _identifiers__scheme='osm', _identifiers__value=self.id) created = True except Entity.DoesNotExist: entity = Entity(source=self.source) created = False if not 'osm' in entity.metadata or \ entity.metadata['osm'].get('attrs', {}).get('timestamp', '') < self.attrs['timestamp']: if created: self.create_count += 1 else: self.modify_count += 1 if name == 'node': entity.location = Point(self.node_location, srid=4326) entity.geometry = entity.location elif name == 'way': cls = LinearRing if self.nodes[0] == self.nodes[-1] else LineString entity.geometry = cls([self.node_locations[n] for n in self.nodes], srid=4326) min_, max_ = (float('inf'), float('inf')), (float('-inf'), float('-inf')) for lon, lat in [self.node_locations[n] for n in self.nodes]: min_ = min(min_[0], lon), min(min_[1], lat) max_ = max(max_[0], lon), max(max_[1], lat) entity.location = Point((min_[0] + max_[0]) / 2 , (min_[1] + max_[1]) / 2 , srid=4326) else: raise AssertionError("There should be no other types of entity we're to deal with.") names = dict() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): if '-' in lang_code: tags_to_try = ('name:%s' % lang_code, 'name:%s' % lang_code.split('-')[0], 'name', 'operator') else: tags_to_try = ('name:%s' % lang_code, 'name', 'operator') name = None for tag_to_try in tags_to_try: if self.tags.get(tag_to_try): name = self.tags.get(tag_to_try) break if name is None: try: name = reverse_geocode(*entity.location)[0]['name'] if not name: raise IndexError name = u"↝ %s" % name except IndexError: name = u"↝ %f, %f" % (self.node_location[1], self.node_location[0]) names[lang_code] = name entity.metadata['osm'] = { 'attrs': dict(self.attrs), 'tags': dict(zip((k.replace(':', '_') for k in self.tags.keys()), self.tags.values())) } entity.primary_type = self.entity_types[types[0]] entity.save(identifiers={'osm': self.id}) for lang_code, name in names.items(): set_name_in_language(entity, lang_code, title=name) entity.all_types = [self.entity_types[et] for et in types] entity.update_all_types_completion() else: self.unchanged_count += 1
def add_stop(self, meta, entity_type, source): # Check this entity is in an area if self.areas != None: in_area = False for area in self.areas: if meta["atco-code"].startswith(area): in_area = True if not in_area: return # See if we're updating an existing object, or creating a new one try: entity = Entity.objects.get( source=source, _identifiers__scheme="atco", _identifiers__value=meta["atco-code"] ) except Entity.DoesNotExist: entity = Entity(source=source) except Entity.MultipleObjectsReturned: # Handle clashes Entity.objects.filter( source=source, _identifiers__scheme="atco", _identifiers__value=meta["atco-code"] ).delete() entity = Entity(source=source) common_name, indicator, locality, street = [ meta.get(k) for k in ("common-name", "indicator", "locality-ref", "street") ] if ( (common_name or "").endswith(" DEL") or (indicator or "").lower() == "not in use" or "to define route" in (common_name or "") or "to def rte" in (common_name or "") or "to def route" in (common_name or "") or "def.rte" in (common_name or "") ): # In the NaPTAN list, but indicates it's an unused stop return if self.meta["stop-type"] in ("MET", "GAT", "FER", "RLY"): names = self.names else: names = dict() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): # Try and find one in our preferred order for lang in (lang_code, "en", None): if lang in self.names: common_name = self.names[lang] break # Expand abbreviations in indicators if indicator is not None: parts = [] for part in indicator.split(): parts.append( { # Translators: This is referring to bus stop location descriptions "op": ugettext("Opposite"), "opp": ugettext("Opposite"), "opposite": ugettext("Opposite"), # Translators: This is referring to bus stop location descriptions "adj": ugettext("Adjacent"), # Translators: This is referring to bus stop location descriptions "outside": ugettext("Outside"), "o/s": ugettext("Outside"), # Translators: This is referring to bus stop location descriptions "nr": ugettext("Near"), # Translators: This is referring to bus stop location descriptions "inside": ugettext("Inside"), # Translators: This is referring to bus stop location descriptions "stp": ugettext("Stop"), }.get(part.lower(), part) ) indicator = " ".join(parts) if indicator is None and self.meta["stop-type"] in ("AIR", "FTD", "RSE", "TMU", "BCE"): # Translators: This is referring to public transport entities title = ugettext("Entrance to %s") % common_name elif indicator is None and self.meta["stop-type"] in ("FBT",): # Translators: This is referring to ferry ports title = ugettext("Berth at %s") % common_name elif indicator is None and self.meta["stop-type"] in ("RPL", "PLT"): # Translators: This is referring to rail and metro stations title = ugettext("Platform at %s") % common_name elif indicator is not None and indicator.lower() != "none" and indicator not in common_name: title = indicator + " " + common_name else: title = common_name if street != None and street != "-": # Deal with all-caps street names if street.upper() == street: fixedstreet = "" wordstart = True for letter in street: if wordstart: wordstart = False fixedstreet += letter continue elif letter == " ": wordstart = True fixedstreet += letter continue else: fixedstreet += letter.lower() street = fixedstreet if street not in title: title += ", " + street locality_lang = self.nptg_localities.get(locality) if locality_lang != None: for lang in (lang_code, "en", "cy"): if lang in locality_lang: if locality_lang[lang] != street: title += ", " + locality_lang[lang] break names[lang_code] = title entity.primary_type = entity_type if not entity.metadata: entity.metadata = {} entity.metadata["naptan"] = meta entity.location = Point(float(meta["longitude"]), float(meta["latitude"]), srid=4326) entity.geometry = entity.location if meta["atco-code"] in self.tube_references: entity.metadata["london-underground-identifiers"] = self.tube_references[meta["atco-code"]] identifiers = {"atco": meta["atco-code"]} if "naptan-code" in meta: meta["naptan-code"] = "".join(map(self.naptan_dial, meta["naptan-code"])) identifiers["naptan"] = meta["naptan-code"] if "plate-code" in meta: identifiers["plate"] = meta["plate-code"] if "crs" in meta: identifiers["crs"] = meta["crs"] if indicator != None and re.match("Stop [A-Z]\d\d?", indicator): identifiers["stop"] = indicator[5:] entity.save(identifiers=identifiers) for lang_code, name in names.items(): # This is the NaPTAN, so default to English if lang_code is None: lang_code = "en" set_name_in_language(entity, lang_code, title=name) entity.all_types = (entity_type,) entity.update_all_types_completion() entity.groups.clear() for stop_area in self.stop_areas: sa, created = EntityGroup.objects.get_or_create(source=source, ref_code=stop_area) entity.groups.add(sa) entity.save() return entity
def _import_cif(self, cif): """ Parse a CIF file """ # Clear cache once per file - avoid high memory usage self._cache = EntityCache() # Also reset SQL queries log reset_queries() routes = [] this_journey = None for line in cif: if line[:2] == 'QS': # Journey header if this_journey is not None: routes[-1]['journies'].append(this_journey) if line[2] == 'D': this_journey = None continue this_journey = { 'operator-code': line[3:7], 'id': line[7:13], 'start-date': self._parse_cif_date(line[13:21]), 'end-date': self._parse_cif_date(line[21:29]), 'days': weekbool( line[29] == '1', # Monday line[30] == '1', # Tuesday line[31] == '1', # Wednesday line[32] == '1', # Thursday line[33] == '1', # Friday line[34] == '1', # Saturday line[35] == '1', # Sunday ), 'school-holidays': { 'S': 'term-time', 'H': 'holidays' }.get(line[36], 'all'), 'bank-holidays': { 'A': 'additional', 'B': 'holidays', 'X': 'non-holidays' }.get(line[37], 'all'), 'route': line[38:42], 'vehicle': line[48:56].strip(), 'direction': line[64], 'notes': [], 'stops': [], } elif line[:2] in ('QN', 'ZN'): # Notes this_journey['notes'].append(line[7:]) elif line[:2] == 'QO': # Journey start try: this_journey['stops'].append({ 'entity': self._cache['atco:%s' % line[2:14].strip()], 'sta': None, 'std': self._parse_cif_time(line[14:18]), 'activity': 'O', 'estimated': line[22] == '0', 'fare-stage': line[24] == '1' }) except Http404: pass elif line[:2] == 'QI': # Journey intermediate stop try: this_journey['stops'].append({ 'entity': self._cache['atco:%s' % line[2:14].strip()], 'sta': self._parse_cif_time(line[14:18]), 'std': self._parse_cif_time(line[18:22]), 'activity': line[22], 'estimated': line[27] == '0', 'fare-stage': line[29] == '1' }) except Http404: pass elif line[:2] == 'QT': # Journey complete try: this_journey['stops'].append({ 'entity': self._cache['atco:%s' % line[2:14].strip()], 'sta': self._parse_cif_time(line[14:18]), 'std': None, 'activity': 'F', 'estimated': line[22] == '0', 'fare-stage': line[24] == '1' }) except Http404: pass elif line[:2] == 'ZL': # Route ID route_id = line[2:] elif line[:2] == 'ZD': # Days route ID route_id += line[18:-1] elif line[:2] == 'ZS': # Route if this_journey is not None: routes[-1]['journies'].append(this_journey) routes.append({ 'id': route_id, 'number': line[10:14].strip(), 'description': line[14:-1], 'stops': [], 'journies': [] }) elif line[:2] == 'ZA': stop_code = line[3:15].strip() try: entity = self._cache['atco:%s' % stop_code] if entity.source == self._get_source(): # Raise Http404 if this is a bus stop we came up with, # so any name changes, etc, get processed raise Http404() except Http404: # Out of zone bus stops with NaPTAN codes try: entity = Entity.objects.get(source=self._get_source(), _identifiers__scheme='atco', _identifiers__value=stop_code) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) identifiers = { 'atco': stop_code } entity_type = self._entity_type entity.primary_type = entity_type entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=line[15:63].strip()) entity.all_types = (entity_type,) entity.update_all_types_completion() entity.save() routes[-1]['stops'].append(entity) if this_journey is not None: routes[-1]['journies'].append(this_journey) return routes
def _scrape(self, route, url, output): url += '&showall=1' service = etree.parse(urlopen(url), parser=etree.HTMLParser()) route.stops.clear() for i, tr in enumerate(service.find('.//table').findall('tr')[1:]): try: stop_code = tr[1][0].text except IndexError: # Stops on ACIS Live that don't have codes, e.g., out of county # stops stop_name = tr[3][0].text try: entity = Entity.objects.get(source=self._get_source(), _identifiers__scheme='acisroute', _identifiers__value=stop_name) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) entity_type = self._get_entity_type() entity.primary_type = entity_type identifiers = { 'acisroute': stop_name } entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=stop_name) entity.all_types = (entity_type,) entity.update_all_types_completion() else: if stop_code.startswith('693') or stop_code.startswith('272') \ or stop_code.startswith('734') or stop_code.startswith('282'): # Oxontime uses NaPTAN code scheme = 'naptan' elif stop_code.startswith('450'): # West Yorkshire uses plate code scheme = 'plate' else: # Everyone else uses ATCO scheme = 'atco' if stop_code.startswith('370'): # Except South Yorkshire, which mangles the code stop_code = '3700%s' % stop_code[3:] try: entity = get_entity(scheme, stop_code) if entity.source == self._get_source(): # Raise Http404 if this is a bus stop we came up with, # so any name changes, etc, get processed raise Http404() except Http404: # Out of zone bus stops with NaPTAN codes - alternatively, # the fake bus stops Oxontime made up for the TUBE route try: entity = Entity.objects.get(source=self._get_source(), _identifiers__scheme=scheme, _identifiers__value=stop_code) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) identifiers = {scheme: stop_code} entity_type = self._get_entity_type() entity.primary_type = entity_type entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=tr[3][0].text) entity.all_types = (entity_type,) entity.update_all_types_completion() entity.save() StopOnRoute.objects.create(route=route, entity=entity, order=i)