Пример #1
0
    def get_schedule_source(self, refresh=False):
        """Loads the schedule source information.

        Loads a schedule source file from either a path or a url specified
        in the config file

        :return schedule_source: The schedule read from a file.

        """

        if 'schedule_source' not in self.data:
            return None

        else:
            source_file = self.data['schedule_source']
            cached_file = self.data['selector'] + '-schedule'

            # Preferably return cached data about schedule
            if refresh is False:
                # Check if _schedule_source data is already present
                if not self._schedule_source:
                    # If not, try to get _schedule_source from file cache
                    self._schedule_source = Cache.read_file(cached_file)
                # Return cached data if found
                if bool(self._schedule_source):
                    return self._schedule_source

            # No cached data was found or refresh was forced
            print("Load schedule source information from " + source_file)

            # Check if local file exists
            if os.path.isfile(source_file):

                # Open file and add to config object
                with open(source_file, 'r') as f:
                    schedule_source = f.read()

            else:
                # Check if it is a valid url
                try:
                    schedule_source_file = urlopen(source_file)
                except ValueError:
                    sys.stderr.write(
                        "Error: Couldn't find schedule_source file.\n")
                    sys.exit(0)
                schedule_source = schedule_source_file.read()

        self._schedule_source = schedule_source

        # Cache data
        Cache.write_file(cached_file, self._schedule_source)
        return self._schedule_source
Пример #2
0
    def get_schedule_source(self, refresh=False):
        """Loads the schedule source information.

        Loads a schedule source file from either a path or a url specified
        in the config file

        :return schedule_source: The schedule read from a file.

        """

        if 'schedule_source' not in self.data:
            return None

        else:
            source_file = self.data['schedule_source']
            cached_file = self.data['selector'] + '-schedule'

            # Preferably return cached data about schedule
            if refresh is False:
                # Check if _schedule_source data is already present
                if not self._schedule_source:
                    # If not, try to get _schedule_source from file cache
                    self._schedule_source = Cache.read_file(cached_file)
                # Return cached data if found
                if bool(self._schedule_source):
                    return self._schedule_source

            # No cached data was found or refresh was forced
            logging.info("Load schedule source information from %s", source_file)

            # Check if local file exists
            if os.path.isfile(source_file):

                # Open file and add to config object
                with open(source_file, 'r') as f:
                    schedule_source = f.read()

            else:
                # Check if it is a valid url
                try:
                    schedule_source_file = urlopen(source_file)
                except ValueError:
                    logging.error("Couldn't find schedule_source file.")
                    sys.exit(0)
                schedule_source = schedule_source_file.read()

        self._schedule_source = schedule_source

        # Cache data
        Cache.write_file(cached_file, self._schedule_source)
        return self._schedule_source
Пример #3
0
 def test_refresh_routes_cache(self):
     data = OsmConnector(self.config)
     cache_file = os.path.join(
         self.standard_variables['data_dir'], self.selector + "-routes.pkl")
     mocked_overpass_data_file = self.standard_variables['mocked_overpass_routes']
     if os.path.isfile(cache_file):
         os.remove(cache_file)
     with patch("osm2gtfs.core.osm_connector.OsmConnector._query_routes") as mocked1:
         overpass_xml = open(mocked_overpass_data_file, mode='r').read()
         api = overpy.Overpass()
         mocked1.return_value = api.parse_xml(overpass_xml)
         data.get_routes(refresh=True)
     self.assertTrue(os.path.isfile(cache_file), 'The routes cache file creation failed')
     cache = Cache()
     routes = cache.read_data(self.selector + "-routes")
     self.assertEqual(
         len(routes), self.required_variables['routes_count'],
         'Wrong count of routes in the cache file')
Пример #4
0
    def _get_names_for_unnamed_stops(self):
        """Intelligently guess stop names for unnamed stops by sourrounding
        street names and amenities.

        Caches stops with newly guessed names.

        """
        # Loop through all stops
        for stop in self.stops['regular'].values():

            # If there is no name, query one intelligently from OSM
            if stop.name == "[" + self.stop_no_name + "]":
                self._find_best_name_for_unnamed_stop(stop)
                logging.info("* Found alternative stop name: " + stop.name +
                             " - " + stop.osm_url)

                # Cache stops with newly created stop names
                Cache.write_data(self.selector + '-stops', self.stops)
Пример #5
0
 def test_refresh_routes_cache(self):
     data = OsmConnector(self.config)
     cache_file = os.path.join(
         self.standard_variables['data_dir'], self.selector + "-routes.pkl")
     mocked_overpass_data_file = self.standard_variables['mocked_overpass_routes']
     if os.path.isfile(cache_file):
         os.remove(cache_file)
     with patch("osm2gtfs.core.osm_connector.OsmConnector._query_routes") as mocked1:
         overpass_xml = open(mocked_overpass_data_file, mode='r').read()
         api = overpy.Overpass()
         mocked1.return_value = api.parse_xml(overpass_xml)
         data.get_routes(refresh=True)
     self.assertTrue(os.path.isfile(cache_file), 'The routes cache file creation failed')
     cache = Cache()
     routes = cache.read_data(self.selector + "-routes")
     self.assertEqual(
         len(routes), self.required_variables['routes_count'],
         'Wrong count of routes in the cache file')
Пример #6
0
 def test_refresh_stops_cache(self):
     data = OsmConnector(self.config)
     cache_file = os.path.join(self.standard_variables['data_dir'], self.selector + "-stops.pkl")
     mocked_overpass_data_file = self.standard_variables['mocked_overpass_stops']
     if os.path.isfile(cache_file):
         os.remove(cache_file)
     with patch("osm2gtfs.core.osm_connector.OsmConnector._query_stops") as mocked1:
         overpass_xml = open(mocked_overpass_data_file, mode='r').read()
         api = overpy.Overpass()
         mocked1.return_value = api.parse_xml(overpass_xml)
         data.get_stops(refresh=True)
     self.assertTrue(os.path.isfile(cache_file), 'The stops cache file creation failed')
     cache = Cache()
     stops = cache.read_data(self.selector + "-stops")
     amount_of_stops = len(stops['regular']) + len(stops['stations'])
     print("> Amount of osm stops: " + str(amount_of_stops))
     self.assertEqual(
         amount_of_stops, self.required_variables['stops_osm_count'],
         'Wrong count of stops in the cache file')
Пример #7
0
 def test_refresh_routes_cache(self):
     data = OsmConnector(self.config)
     cache_file = os.path.join(self.data_dir, "accra-routes.pkl")
     mocked_overpass_data_file = os.path.join(self.fixture_folder,
                                              "overpass-routes.xml")
     if os.path.isfile(cache_file):
         os.remove(cache_file)
     with patch("osm2gtfs.core.osm_connector.OsmConnector._query_routes"
                ) as mocked1:
         overpass_xml = open(mocked_overpass_data_file, mode='r').read()
         api = overpy.Overpass()
         mocked1.return_value = api.parse_xml(overpass_xml)
         data.get_routes(refresh=True)
     self.assertTrue(os.path.isfile(cache_file),
                     'The routes cache file creation failed')
     cache = Cache()
     routes = cache.read_data('accra-routes')
     self.assertEqual(len(routes), 277,
                      'Wrong count of routes in the cache file')
Пример #8
0
 def test_refresh_stops_cache(self):
     data = OsmConnector(self.config)
     cache_file = os.path.join(self.standard_variables['data_dir'], self.selector + "-stops.pkl")
     mocked_overpass_data_file = self.standard_variables['mocked_overpass_stops']
     if os.path.isfile(cache_file):
         os.remove(cache_file)
     with patch("osm2gtfs.core.osm_connector.OsmConnector._query_stops") as mocked1:
         overpass_xml = open(mocked_overpass_data_file, mode='r').read()
         api = overpy.Overpass()
         mocked1.return_value = api.parse_xml(overpass_xml)
         data.get_stops(refresh=True)
     self.assertTrue(os.path.isfile(cache_file), 'The stops cache file creation failed')
     cache = Cache()
     stops = cache.read_data(self.selector + "-stops")
     amount_of_stops = len(stops['regular']) + len(stops['stations'])
     print("> Amount of osm stops: " + str(amount_of_stops))
     self.assertEqual(
         amount_of_stops, self.required_variables['stops_osm_count'],
         'Wrong count of stops in the cache file')
Пример #9
0
 def test_refresh_stops_cache(self):
     data = OsmConnector(self.config)
     cache_file = os.path.join(self.data_dir, "accra-stops.pkl")
     mocked_overpass_data_file = os.path.join(self.fixture_folder,
                                              "overpass-stops.xml")
     if os.path.isfile(cache_file):
         os.remove(cache_file)
     with patch("osm2gtfs.core.osm_connector.OsmConnector._query_stops"
                ) as mocked1:
         overpass_xml = open(mocked_overpass_data_file, mode='r').read()
         api = overpy.Overpass()
         mocked1.return_value = api.parse_xml(overpass_xml)
         data.get_stops(refresh=True)
     self.assertTrue(os.path.isfile(cache_file),
                     'The stops cache file creation failed')
     cache = Cache()
     stops = cache.read_data('accra-stops')
     amount_of_stops = len(stops['regular']) + len(stops['stations'])
     self.assertEqual(amount_of_stops, 2529,
                      'Wrong count of stops in the cache file')
Пример #10
0
 def test_refresh_routes_cache(self):
     data = OsmConnector(self.config)
     cache_file = os.path.join(self.standard_variables['data_dir'],
                               self.selector + "-routes.pkl")
     mocked_overpass_data_file = self.standard_variables[
         'mocked_overpass_routes']
     if os.path.isfile(cache_file):
         os.remove(cache_file)
     with patch("osm2gtfs.core.osm_connector.OsmConnector._query_routes"
                ) as mocked1:
         with open(mocked_overpass_data_file, mode='r') as ov:
             overpass_xml = ov.read()
             api = overpy.Overpass()
             mocked1.return_value = api.parse_xml(overpass_xml)
             data.get_routes(refresh=True)
     self.assertTrue(os.path.isfile(cache_file),
                     'The routes cache file creation failed')
     cache = Cache()
     routes = cache.read_data(self.selector + "-routes")
     # The Florianopolis creator eliminates (nine) routes in the Trips creator.
     # This should be revised. Afterwards this overriden function can be removed.
     self.assertEqual(len(routes),
                      self.required_variables['routes_count'] + 9,
                      'Wrong count of routes in the cache file')
Пример #11
0
    def get_routes(self, refresh=False):
        """The get_routes function returns the data of routes from
        OpenStreetMap converted into usable objects.

        Data about routes is getting obtained from OpenStreetMap through the
        Overpass API, based on the configuration from the config file.

        Then this data gets prepared by building up objects of Line and
        Itinerary objects that are related to each other.

        It uses caching to leverage fast performance and spare the Overpass
        API. Special commands are used to refresh cached data.

        :param self: the own object including it's functions and variables
        :param refresh: A simple boolean indicating a data refresh or use of
            caching if possible.

        :return routes: A dictionary of Line objects with related
            Itinerary objects constituting the tree of data.

        """
        # Preferably return cached data about routes
        if refresh is False:
            # Check if routes data is already built in this object
            if not self.routes:
                # If not, try to get routes data from file cache
                self.routes = Cache.read_data(self.selector + '-routes')
            # Return cached data if found
            if bool(self.routes):
                return self.routes

        # No cached data was found or refresh was forced
        logging.info("Query and build fresh data for routes")

        # Obtain raw data about routes from OpenStreetMap
        result = self._query_routes()

        # Pre-sort relations by type
        route_masters = {}
        route_variants = {}
        for relation in result.relations:
            if relation.tags["type"] == "route_master":
                route_masters[relation.id] = relation
            else:
                route_variants[relation.id] = relation

        # Build routes from master relations
        for rmid, route_master in route_masters.iteritems():
            itineraries = OrderedDict()

            # Build route variant members
            for member in route_master.members:

                # Create Itinerary objects from member route variants
                if member.ref in route_variants:
                    rv = route_variants.pop(member.ref)
                    itinerary = self._build_itinerary(rv, result, route_master)
                    if itinerary is not None:
                        itineraries[rv.id] = itinerary

                # Route variant was already used or is not valid
                else:
                    rv = result.get_relations(member.ref)
                    if bool(rv):
                        rv = rv.pop()
                        logging.warning("Itinerary was assigned again:")
                        logging.warning("https://osm.org/relation/%s", rv.id)
                        itinerary = self._build_itinerary(
                            rv, result, route_master)
                        if itinerary is not None:
                            itineraries[rv.id] = itinerary
                    else:
                        logging.warning("Warning: This relation route master:")
                        logging.warning(" https://osm.org/relation/%s",
                                        route_master.id)
                        logging.warning(
                            " has a member which is not a valid itinerary:")
                        logging.warning(" https://osm.org/relation/%s",
                                        member.ref)

            # Create Line object from route master
            line = self._build_line(route_master, itineraries)

            if line is None:
                continue

            # Make sure route_id (ref) number is not already taken
            if line.route_id and line.route_id in [
                    elem.route_id for elem in self.routes.values()
            ]:
                logging.warning("'Ref' of route_master already taken")
                logging.warning(" https://osm.org/relation/%s",
                                route_master.id)
                logging.warning(" Skipped. Please fix in OpenStreetMap")
                continue

            self.routes[str(line.osm_id)] = line

        # Build routes from variants (missing master relation)
        for rvid, route_variant in route_variants.iteritems():
            logging.warning("Route (variant) without route_master")
            logging.warning(" https://osm.org/relation/%s", route_variant.id)
            logging.warning(" Please fix in OpenStreetMap")
            itinerary = self._build_itinerary(route_variant, result, False)

            # Make sure route_id (ref) number is not already taken
            if itinerary is not None:
                if itinerary.route_id in self.routes:
                    logging.warning("Route with existing route_id (ref)")
                    logging.warning(" https://osm.org/relation/%s",
                                    route_variant.id)
                    logging.warning(" Skipped. Please fix in OpenStreetMap")
                else:
                    # Create Line from route variant
                    itineraries = OrderedDict()
                    itineraries[itinerary.osm_id] = itinerary
                    line = self._build_line(route_variant, itineraries)
                    if line is not None:
                        self.routes[line.route_id] = line

        # Cache data
        Cache.write_data(self.selector + '-routes', self.routes)

        return self.routes
Пример #12
0
    def get_stops(self, refresh=False):
        """The get_stops function returns the data of stops and stop areas from
        OpenStreetMap converted into usable objects.

        Data about stops and stop_areas is getting obtained from OpenStreetMap
        through the Overpass API, based on the configuration from the config
        file.

        Then this data gets prepared by building up objects of the class Stops
        and Station (when the Stops are members of a stop_area)

        It uses caching to leverage fast performance and spare the Overpass
        API. Special commands are used to refresh cached data.

        :param self: the own object including it's functions and variables
        :param refresh: A simple boolean indicating a data refresh or use of
            caching if possible.

        :return stops: A dictionary of Stops and Stations constituting the
            obtained data.

        """

        # Preferably return cached data about stops
        if refresh is False:
            # Check if stops data is already built in this object
            if not self.stops:
                # If not, try to get stops data from file cache
                self.stops = Cache.read_data(self.selector + '-stops')

            if bool(self.stops):
                # Maybe check for unnamed stop names
                if self.auto_stop_names:
                    self._get_names_for_unnamed_stops()

                # Return cached data if found
                return self.stops

        # No cached data was found or refresh was forced
        logging.info("Query and build fresh data for stops")

        # Obtain raw data about routes from OpenStreetMap
        result = self._query_stops()
        self.stops['regular'] = {}
        self.stops['stations'] = {}

        # Build stops from ways (polygons)
        for stop in result.ways:
            osm_type = "way"
            stop_object = self._build_stop(stop, osm_type)
            if stop_object:
                self.stops['regular'][osm_type + "/" +
                                      str(stop_object.osm_id)] = stop_object

        # Build stops from nodes
        for stop in result.nodes:
            osm_type = "node"
            stop_object = self._build_stop(stop, osm_type)
            if stop_object:
                self.stops['regular'][osm_type + "/" +
                                      str(stop_object.osm_id)] = stop_object

        # Build stations from stop_area relations
        for stop in result.relations:
            osm_type = "relation"
            stop_object = self._build_station(stop, osm_type)
            if stop_object:
                self.stops['stations'][osm_type + "/" +
                                       str(stop.id)] = stop_object

        # Cache data
        Cache.write_data(self.selector + '-stops', self.stops)

        # Maybe check for unnamed stop names
        if self.auto_stop_names:
            self._get_names_for_unnamed_stops()

        return self.stops
Пример #13
0
    def get_schedule_source(self, refresh=False):
        """Loads the schedule source information.

        Loads a schedule source file from either a path or a url specified
        in the config file

        :return schedule_source: The schedule read from a file.

        """
        cached_file = self.data['selector'] + '-schedule'

        if 'schedule_script' and 'schedule_csv' in self.data:

            logging.info(
                "Generating schedule using frequencies and script given in config file."
            )

            schedule_script = self.data['schedule_script']
            schedule_csv = self.data['schedule_csv']
            header_data = {
                "start_date": self.data['feed_info']['start_date'],
                "end_date": self.data['feed_info']['end_date'],
                "included_lines": [],
                "excluded_lines": []
            }

            script_dir = os.path.dirname(schedule_script)
            script_name = os.path.splitext(
                os.path.basename(schedule_script))[0]
            sys.path.insert(0, script_dir)
            gt = importlib.import_module(script_name)
            schedule_json = gt.generate_json(gt.load_csv(schedule_csv),
                                             header_data)

            if 'schedule_source' in self.data:
                logging.info("Writing schedule to {}.".format(
                    self.data['schedule_source']))
                gt.write_json(schedule_json, self.data['schedule_source'])
                schedule_source = json.dumps(schedule_json)
            else:
                schedule_source = json.dumps(schedule_json)

        else:
            if 'schedule_source' not in self.data:
                return None
            else:
                source_file = self.data['schedule_source']

                # Preferably return cached data about schedule
                if refresh is False:
                    # Check if _schedule_source data is already present
                    if not self._schedule_source:
                        # If not, try to get _schedule_source from file cache
                        self._schedule_source = Cache.read_file(cached_file)
                    # Return cached data if found
                    if bool(self._schedule_source):
                        return self._schedule_source

                # No cached data was found or refresh was forced
                logging.info("Load schedule source information from %s",
                             source_file)

                # Check if local file exists
                if os.path.isfile(source_file):

                    # Open file and add to config object
                    with open(source_file, 'r') as f:
                        schedule_source = f.read()

                else:
                    # Check if it is a valid url
                    try:
                        schedule_source_file = urlopen(source_file)
                    except ValueError:
                        logging.error("Couldn't find schedule_source file.")
                        sys.exit(0)
                    schedule_source = schedule_source_file.read()

        self._schedule_source = schedule_source

        # Cache data
        Cache.write_file(cached_file, self._schedule_source)
        return self._schedule_source