def get_schedule_source(self, refresh=False): """Loads the schedule source information. Loads a schedule source file from either a path or a url specified in the config file :return schedule_source: The schedule read from a file. """ if 'schedule_source' not in self.data: return None else: source_file = self.data['schedule_source'] cached_file = self.data['selector'] + '-schedule' # Preferably return cached data about schedule if refresh is False: # Check if _schedule_source data is already present if not self._schedule_source: # If not, try to get _schedule_source from file cache self._schedule_source = Cache.read_file(cached_file) # Return cached data if found if bool(self._schedule_source): return self._schedule_source # No cached data was found or refresh was forced print("Load schedule source information from " + source_file) # Check if local file exists if os.path.isfile(source_file): # Open file and add to config object with open(source_file, 'r') as f: schedule_source = f.read() else: # Check if it is a valid url try: schedule_source_file = urlopen(source_file) except ValueError: sys.stderr.write( "Error: Couldn't find schedule_source file.\n") sys.exit(0) schedule_source = schedule_source_file.read() self._schedule_source = schedule_source # Cache data Cache.write_file(cached_file, self._schedule_source) return self._schedule_source
def get_schedule_source(self, refresh=False): """Loads the schedule source information. Loads a schedule source file from either a path or a url specified in the config file :return schedule_source: The schedule read from a file. """ if 'schedule_source' not in self.data: return None else: source_file = self.data['schedule_source'] cached_file = self.data['selector'] + '-schedule' # Preferably return cached data about schedule if refresh is False: # Check if _schedule_source data is already present if not self._schedule_source: # If not, try to get _schedule_source from file cache self._schedule_source = Cache.read_file(cached_file) # Return cached data if found if bool(self._schedule_source): return self._schedule_source # No cached data was found or refresh was forced logging.info("Load schedule source information from %s", source_file) # Check if local file exists if os.path.isfile(source_file): # Open file and add to config object with open(source_file, 'r') as f: schedule_source = f.read() else: # Check if it is a valid url try: schedule_source_file = urlopen(source_file) except ValueError: logging.error("Couldn't find schedule_source file.") sys.exit(0) schedule_source = schedule_source_file.read() self._schedule_source = schedule_source # Cache data Cache.write_file(cached_file, self._schedule_source) return self._schedule_source
def test_refresh_routes_cache(self): data = OsmConnector(self.config) cache_file = os.path.join( self.standard_variables['data_dir'], self.selector + "-routes.pkl") mocked_overpass_data_file = self.standard_variables['mocked_overpass_routes'] if os.path.isfile(cache_file): os.remove(cache_file) with patch("osm2gtfs.core.osm_connector.OsmConnector._query_routes") as mocked1: overpass_xml = open(mocked_overpass_data_file, mode='r').read() api = overpy.Overpass() mocked1.return_value = api.parse_xml(overpass_xml) data.get_routes(refresh=True) self.assertTrue(os.path.isfile(cache_file), 'The routes cache file creation failed') cache = Cache() routes = cache.read_data(self.selector + "-routes") self.assertEqual( len(routes), self.required_variables['routes_count'], 'Wrong count of routes in the cache file')
def _get_names_for_unnamed_stops(self): """Intelligently guess stop names for unnamed stops by sourrounding street names and amenities. Caches stops with newly guessed names. """ # Loop through all stops for stop in self.stops['regular'].values(): # If there is no name, query one intelligently from OSM if stop.name == "[" + self.stop_no_name + "]": self._find_best_name_for_unnamed_stop(stop) logging.info("* Found alternative stop name: " + stop.name + " - " + stop.osm_url) # Cache stops with newly created stop names Cache.write_data(self.selector + '-stops', self.stops)
def test_refresh_stops_cache(self): data = OsmConnector(self.config) cache_file = os.path.join(self.standard_variables['data_dir'], self.selector + "-stops.pkl") mocked_overpass_data_file = self.standard_variables['mocked_overpass_stops'] if os.path.isfile(cache_file): os.remove(cache_file) with patch("osm2gtfs.core.osm_connector.OsmConnector._query_stops") as mocked1: overpass_xml = open(mocked_overpass_data_file, mode='r').read() api = overpy.Overpass() mocked1.return_value = api.parse_xml(overpass_xml) data.get_stops(refresh=True) self.assertTrue(os.path.isfile(cache_file), 'The stops cache file creation failed') cache = Cache() stops = cache.read_data(self.selector + "-stops") amount_of_stops = len(stops['regular']) + len(stops['stations']) print("> Amount of osm stops: " + str(amount_of_stops)) self.assertEqual( amount_of_stops, self.required_variables['stops_osm_count'], 'Wrong count of stops in the cache file')
def test_refresh_routes_cache(self): data = OsmConnector(self.config) cache_file = os.path.join(self.data_dir, "accra-routes.pkl") mocked_overpass_data_file = os.path.join(self.fixture_folder, "overpass-routes.xml") if os.path.isfile(cache_file): os.remove(cache_file) with patch("osm2gtfs.core.osm_connector.OsmConnector._query_routes" ) as mocked1: overpass_xml = open(mocked_overpass_data_file, mode='r').read() api = overpy.Overpass() mocked1.return_value = api.parse_xml(overpass_xml) data.get_routes(refresh=True) self.assertTrue(os.path.isfile(cache_file), 'The routes cache file creation failed') cache = Cache() routes = cache.read_data('accra-routes') self.assertEqual(len(routes), 277, 'Wrong count of routes in the cache file')
def test_refresh_stops_cache(self): data = OsmConnector(self.config) cache_file = os.path.join(self.data_dir, "accra-stops.pkl") mocked_overpass_data_file = os.path.join(self.fixture_folder, "overpass-stops.xml") if os.path.isfile(cache_file): os.remove(cache_file) with patch("osm2gtfs.core.osm_connector.OsmConnector._query_stops" ) as mocked1: overpass_xml = open(mocked_overpass_data_file, mode='r').read() api = overpy.Overpass() mocked1.return_value = api.parse_xml(overpass_xml) data.get_stops(refresh=True) self.assertTrue(os.path.isfile(cache_file), 'The stops cache file creation failed') cache = Cache() stops = cache.read_data('accra-stops') amount_of_stops = len(stops['regular']) + len(stops['stations']) self.assertEqual(amount_of_stops, 2529, 'Wrong count of stops in the cache file')
def test_refresh_routes_cache(self): data = OsmConnector(self.config) cache_file = os.path.join(self.standard_variables['data_dir'], self.selector + "-routes.pkl") mocked_overpass_data_file = self.standard_variables[ 'mocked_overpass_routes'] if os.path.isfile(cache_file): os.remove(cache_file) with patch("osm2gtfs.core.osm_connector.OsmConnector._query_routes" ) as mocked1: with open(mocked_overpass_data_file, mode='r') as ov: overpass_xml = ov.read() api = overpy.Overpass() mocked1.return_value = api.parse_xml(overpass_xml) data.get_routes(refresh=True) self.assertTrue(os.path.isfile(cache_file), 'The routes cache file creation failed') cache = Cache() routes = cache.read_data(self.selector + "-routes") # The Florianopolis creator eliminates (nine) routes in the Trips creator. # This should be revised. Afterwards this overriden function can be removed. self.assertEqual(len(routes), self.required_variables['routes_count'] + 9, 'Wrong count of routes in the cache file')
def get_routes(self, refresh=False): """The get_routes function returns the data of routes from OpenStreetMap converted into usable objects. Data about routes is getting obtained from OpenStreetMap through the Overpass API, based on the configuration from the config file. Then this data gets prepared by building up objects of Line and Itinerary objects that are related to each other. It uses caching to leverage fast performance and spare the Overpass API. Special commands are used to refresh cached data. :param self: the own object including it's functions and variables :param refresh: A simple boolean indicating a data refresh or use of caching if possible. :return routes: A dictionary of Line objects with related Itinerary objects constituting the tree of data. """ # Preferably return cached data about routes if refresh is False: # Check if routes data is already built in this object if not self.routes: # If not, try to get routes data from file cache self.routes = Cache.read_data(self.selector + '-routes') # Return cached data if found if bool(self.routes): return self.routes # No cached data was found or refresh was forced logging.info("Query and build fresh data for routes") # Obtain raw data about routes from OpenStreetMap result = self._query_routes() # Pre-sort relations by type route_masters = {} route_variants = {} for relation in result.relations: if relation.tags["type"] == "route_master": route_masters[relation.id] = relation else: route_variants[relation.id] = relation # Build routes from master relations for rmid, route_master in route_masters.iteritems(): itineraries = OrderedDict() # Build route variant members for member in route_master.members: # Create Itinerary objects from member route variants if member.ref in route_variants: rv = route_variants.pop(member.ref) itinerary = self._build_itinerary(rv, result, route_master) if itinerary is not None: itineraries[rv.id] = itinerary # Route variant was already used or is not valid else: rv = result.get_relations(member.ref) if bool(rv): rv = rv.pop() logging.warning("Itinerary was assigned again:") logging.warning("https://osm.org/relation/%s", rv.id) itinerary = self._build_itinerary( rv, result, route_master) if itinerary is not None: itineraries[rv.id] = itinerary else: logging.warning("Warning: This relation route master:") logging.warning(" https://osm.org/relation/%s", route_master.id) logging.warning( " has a member which is not a valid itinerary:") logging.warning(" https://osm.org/relation/%s", member.ref) # Create Line object from route master line = self._build_line(route_master, itineraries) if line is None: continue # Make sure route_id (ref) number is not already taken if line.route_id and line.route_id in [ elem.route_id for elem in self.routes.values() ]: logging.warning("'Ref' of route_master already taken") logging.warning(" https://osm.org/relation/%s", route_master.id) logging.warning(" Skipped. Please fix in OpenStreetMap") continue self.routes[str(line.osm_id)] = line # Build routes from variants (missing master relation) for rvid, route_variant in route_variants.iteritems(): logging.warning("Route (variant) without route_master") logging.warning(" https://osm.org/relation/%s", route_variant.id) logging.warning(" Please fix in OpenStreetMap") itinerary = self._build_itinerary(route_variant, result, False) # Make sure route_id (ref) number is not already taken if itinerary is not None: if itinerary.route_id in self.routes: logging.warning("Route with existing route_id (ref)") logging.warning(" https://osm.org/relation/%s", route_variant.id) logging.warning(" Skipped. Please fix in OpenStreetMap") else: # Create Line from route variant itineraries = OrderedDict() itineraries[itinerary.osm_id] = itinerary line = self._build_line(route_variant, itineraries) if line is not None: self.routes[line.route_id] = line # Cache data Cache.write_data(self.selector + '-routes', self.routes) return self.routes
def get_stops(self, refresh=False): """The get_stops function returns the data of stops and stop areas from OpenStreetMap converted into usable objects. Data about stops and stop_areas is getting obtained from OpenStreetMap through the Overpass API, based on the configuration from the config file. Then this data gets prepared by building up objects of the class Stops and Station (when the Stops are members of a stop_area) It uses caching to leverage fast performance and spare the Overpass API. Special commands are used to refresh cached data. :param self: the own object including it's functions and variables :param refresh: A simple boolean indicating a data refresh or use of caching if possible. :return stops: A dictionary of Stops and Stations constituting the obtained data. """ # Preferably return cached data about stops if refresh is False: # Check if stops data is already built in this object if not self.stops: # If not, try to get stops data from file cache self.stops = Cache.read_data(self.selector + '-stops') if bool(self.stops): # Maybe check for unnamed stop names if self.auto_stop_names: self._get_names_for_unnamed_stops() # Return cached data if found return self.stops # No cached data was found or refresh was forced logging.info("Query and build fresh data for stops") # Obtain raw data about routes from OpenStreetMap result = self._query_stops() self.stops['regular'] = {} self.stops['stations'] = {} # Build stops from ways (polygons) for stop in result.ways: osm_type = "way" stop_object = self._build_stop(stop, osm_type) if stop_object: self.stops['regular'][osm_type + "/" + str(stop_object.osm_id)] = stop_object # Build stops from nodes for stop in result.nodes: osm_type = "node" stop_object = self._build_stop(stop, osm_type) if stop_object: self.stops['regular'][osm_type + "/" + str(stop_object.osm_id)] = stop_object # Build stations from stop_area relations for stop in result.relations: osm_type = "relation" stop_object = self._build_station(stop, osm_type) if stop_object: self.stops['stations'][osm_type + "/" + str(stop.id)] = stop_object # Cache data Cache.write_data(self.selector + '-stops', self.stops) # Maybe check for unnamed stop names if self.auto_stop_names: self._get_names_for_unnamed_stops() return self.stops
def get_schedule_source(self, refresh=False): """Loads the schedule source information. Loads a schedule source file from either a path or a url specified in the config file :return schedule_source: The schedule read from a file. """ cached_file = self.data['selector'] + '-schedule' if 'schedule_script' and 'schedule_csv' in self.data: logging.info( "Generating schedule using frequencies and script given in config file." ) schedule_script = self.data['schedule_script'] schedule_csv = self.data['schedule_csv'] header_data = { "start_date": self.data['feed_info']['start_date'], "end_date": self.data['feed_info']['end_date'], "included_lines": [], "excluded_lines": [] } script_dir = os.path.dirname(schedule_script) script_name = os.path.splitext( os.path.basename(schedule_script))[0] sys.path.insert(0, script_dir) gt = importlib.import_module(script_name) schedule_json = gt.generate_json(gt.load_csv(schedule_csv), header_data) if 'schedule_source' in self.data: logging.info("Writing schedule to {}.".format( self.data['schedule_source'])) gt.write_json(schedule_json, self.data['schedule_source']) schedule_source = json.dumps(schedule_json) else: schedule_source = json.dumps(schedule_json) else: if 'schedule_source' not in self.data: return None else: source_file = self.data['schedule_source'] # Preferably return cached data about schedule if refresh is False: # Check if _schedule_source data is already present if not self._schedule_source: # If not, try to get _schedule_source from file cache self._schedule_source = Cache.read_file(cached_file) # Return cached data if found if bool(self._schedule_source): return self._schedule_source # No cached data was found or refresh was forced logging.info("Load schedule source information from %s", source_file) # Check if local file exists if os.path.isfile(source_file): # Open file and add to config object with open(source_file, 'r') as f: schedule_source = f.read() else: # Check if it is a valid url try: schedule_source_file = urlopen(source_file) except ValueError: logging.error("Couldn't find schedule_source file.") sys.exit(0) schedule_source = schedule_source_file.read() self._schedule_source = schedule_source # Cache data Cache.write_file(cached_file, self._schedule_source) return self._schedule_source