Exemplo n.º 1
0
 def __init__(self,
              id,
              service_url,
              timezone,
              object_id_tag=None,
              timeout=10,
              redis_host=None,
              redis_db=0,
              redis_port=6379,
              redis_password=None,
              max_requests_by_second=15,
              redis_namespace='jormungandr.rate_limiter'):
     self.service_url = service_url
     self.timeout = timeout  # timeout in seconds
     self.rt_system_id = id
     self.object_id_tag = object_id_tag if object_id_tag else id
     self.breaker = pybreaker.CircuitBreaker(
         fail_max=app.config['CIRCUIT_BREAKER_MAX_SYNTHESE_FAIL'],
         reset_timeout=app.config['CIRCUIT_BREAKER_SYNTHESE_TIMEOUT_S'])
     self.timezone = pytz.timezone(timezone)
     if not redis_host:
         self.rate_limiter = FakeRateLimiter()
     else:
         self.rate_limiter = RateLimiter(conditions=[{
             'requests': max_requests_by_second,
             'seconds': 1
         }],
                                         redis_host=redis_host,
                                         redis_port=redis_port,
                                         redis_db=redis_db,
                                         redis_password=redis_password,
                                         redis_namespace=redis_namespace)
Exemplo n.º 2
0
    def __init__(self, id, service_url, service_args, timezone,
                 object_id_tag=None, destination_id_tag=None, instance=None, timeout=10, **kwargs):
        self.service_url = service_url
        self.service_args = service_args
        self.timeout = timeout  #timeout in seconds
        self.rt_system_id = id
        self.object_id_tag = object_id_tag if object_id_tag else id
        self.destination_id_tag = destination_id_tag
        self.instance = instance
        self.breaker = pybreaker.CircuitBreaker(fail_max=app.config['CIRCUIT_BREAKER_MAX_TIMEO_FAIL'],
                                                reset_timeout=app.config['CIRCUIT_BREAKER_TIMEO_TIMEOUT_S'])

        # Note: if the timezone is not know, pytz raise an error
        self.timezone = pytz.timezone(timezone)

        if kwargs.get('redis_host') and kwargs.get('rate_limit_count'):
            self.rate_limiter = RateLimiter(conditions=[{'requests': kwargs.get('rate_limit_count'),
                                                         'seconds': kwargs.get('rate_limit_duration', 1)}],
                                            redis_host=kwargs.get('redis_host'),
                                            redis_port=kwargs.get('redis_port', 6379),
                                            redis_db=kwargs.get('redis_db', 0),
                                            redis_password=kwargs.get('redis_password'),
                                            redis_namespace=kwargs.get('redis_namespace', 'jormungandr.rate_limiter'))
        else:
            self.rate_limiter = FakeRateLimiter()
Exemplo n.º 3
0
    def __init__(self, id, service_url, service_args, timezone,
                 object_id_tag=None, destination_id_tag=None, instance=None, timeout=10, **kwargs):
        self.service_url = service_url
        self.service_args = service_args
        self.timeout = timeout  #timeout in seconds
        self.rt_system_id = id
        self.object_id_tag = object_id_tag if object_id_tag else id
        self.destination_id_tag = destination_id_tag
        self.instance = instance
        fail_max = kwargs.get('circuit_breaker_max_fail', app.config['CIRCUIT_BREAKER_MAX_TIMEO_FAIL'])
        reset_timeout = kwargs.get('circuit_breaker_reset_timeout', app.config['CIRCUIT_BREAKER_TIMEO_TIMEOUT_S'])
        self.breaker = pybreaker.CircuitBreaker(fail_max=fail_max, reset_timeout=reset_timeout)
        # A step is applied on from_datetime to discretize calls and allow caching them
        self.from_datetime_step = kwargs.get('from_datetime_step', app.config['CACHE_CONFIGURATION'].get('TIMEOUT_TIMEO', 60))

        # Note: if the timezone is not know, pytz raise an error
        self.timezone = pytz.timezone(timezone)

        if kwargs.get('redis_host') and kwargs.get('rate_limit_count'):
            self.rate_limiter = RateLimiter(conditions=[{'requests': kwargs.get('rate_limit_count'),
                                                         'seconds': kwargs.get('rate_limit_duration', 1)}],
                                            redis_host=kwargs.get('redis_host'),
                                            redis_port=kwargs.get('redis_port', 6379),
                                            redis_db=kwargs.get('redis_db', 0),
                                            redis_password=kwargs.get('redis_password'),
                                            redis_namespace=kwargs.get('redis_namespace', 'jormungandr.rate_limiter'))
        else:
            self.rate_limiter = FakeRateLimiter()
Exemplo n.º 4
0
    def __init__(self,
                 id,
                 service_url,
                 timezone,
                 object_id_tag=None,
                 destination_id_tag=None,
                 instance=None,
                 timeout=10,
                 redis_host=None,
                 redis_db=0,
                 redis_port=6379,
                 redis_password=None,
                 max_requests_by_second=15,
                 redis_namespace='jormungandr.rate_limiter',
                 **kwargs):
        self.service_url = service_url
        self.timeout = timeout  # timeout in seconds
        self.rt_system_id = id
        self.object_id_tag = object_id_tag if object_id_tag else id
        self.destination_id_tag = destination_id_tag
        self.instance = instance

        fail_max = kwargs.get(
            'circuit_breaker_max_fail',
            app.config.get(str('CIRCUIT_BREAKER_MAX_SYNTHESE_FAIL'), 5))
        reset_timeout = kwargs.get(
            'circuit_breaker_reset_timeout',
            app.config.get(str('CIRCUIT_BREAKER_SYNTHESE_TIMEOUT_S'), 60))
        self.breaker = pybreaker.CircuitBreaker(fail_max=fail_max,
                                                reset_timeout=reset_timeout)
        self.timezone = pytz.timezone(timezone)
        if not redis_host:
            self.rate_limiter = FakeRateLimiter()
        else:
            self.rate_limiter = RateLimiter(
                conditions=[{
                    'requests': max_requests_by_second,
                    'seconds': 1
                }],
                redis_host=redis_host,
                redis_port=redis_port,
                redis_db=redis_db,
                redis_password=redis_password,
                redis_namespace=redis_namespace,
            )
Exemplo n.º 5
0
    def __init__(
        self,
        id,
        service_url,
        service_args,
        timezone,
        object_id_tag=None,
        destination_id_tag=None,
        instance=None,
        timeout=10,
        **kwargs
    ):
        self.service_url = service_url
        self.service_args = service_args
        self.timeout = timeout  # timeout in seconds
        self.rt_system_id = id
        self.object_id_tag = object_id_tag if object_id_tag else id
        self.destination_id_tag = destination_id_tag if destination_id_tag else "source"
        self.timeo_stop_code = kwargs.get("source_stop_code", "StopTimeoCode")
        self.timeo_line_code = kwargs.get("source_line_code", "LineTimeoCode")
        self.next_stop_time_number = kwargs.get("next_stop_time_number", 5)

        self.instance = instance
        fail_max = kwargs.get(
            'circuit_breaker_max_fail', app.config.get(str('CIRCUIT_BREAKER_MAX_TIMEO_FAIL'), 5)
        )
        reset_timeout = kwargs.get(
            'circuit_breaker_reset_timeout', app.config.get(str('CIRCUIT_BREAKER_TIMEO_TIMEOUT_S'), 60)
        )
        self.breaker = pybreaker.CircuitBreaker(fail_max=fail_max, reset_timeout=reset_timeout)
        # A step is applied on from_datetime to discretize calls and allow caching them
        self.from_datetime_step = kwargs.get(
            'from_datetime_step', app.config.get(str('CACHE_CONFIGURATION'), {}).get(str('TIMEOUT_TIMEO'), 60)
        )

        # Note: if the timezone is not know, pytz raise an error
        self.timezone = pytz.timezone(timezone)

        if kwargs.get('redis_host') and kwargs.get('rate_limit_count'):
            self.rate_limiter = RateLimiter(
                conditions=[
                    {'requests': kwargs.get('rate_limit_count'), 'seconds': kwargs.get('rate_limit_duration', 1)}
                ],
                redis_host=kwargs.get('redis_host'),
                redis_port=kwargs.get('redis_port', 6379),
                redis_db=kwargs.get('redis_db', 0),
                redis_password=kwargs.get('redis_password'),
                redis_namespace=kwargs.get('redis_namespace', 'jormungandr.rate_limiter'),
            )
        else:
            self.rate_limiter = FakeRateLimiter()

        # We consider that all errors, greater than or equal to 100, are blocking
        self.INTERNAL_TIMEO_ERROR_CODE_LIMIT = 100
Exemplo n.º 6
0
class Synthese(RealtimeProxy):
    """
    class managing calls to timeo external service providing real-time next passages


    curl example to check/test that external service is working:
    curl -X GET '{server}?SERVICE=tdg&roid={stop_code}&rn={nb_desired}&date={datetime}'

    {nb_desired} and {datetime} can be empty
    {datetime} is on using format '%Y-%m-%d %H-%M' which is urlencoded (' ' > '%20')

    On the response, Navitia matches route-point's {route_codes} (see details in _find_route_point_passages()).
    {route_codes} and {stop_code} are provided using the same code key, named after
    the 'destination_id_tag' if provided on connector's init, or the 'id' otherwise.

    In practice it will look like:
    curl -X GET 'http://bobito.fr/?SERVICE=tdg&roid=68435211116990230&rn=5&date=2018-06-11%2011:13'
    """
    def __init__(self,
                 id,
                 service_url,
                 timezone,
                 object_id_tag=None,
                 destination_id_tag=None,
                 instance=None,
                 timeout=10,
                 redis_host=None,
                 redis_db=0,
                 redis_port=6379,
                 redis_password=None,
                 max_requests_by_second=15,
                 redis_namespace='jormungandr.rate_limiter',
                 **kwargs):
        self.service_url = service_url
        self.timeout = timeout  # timeout in seconds
        self.rt_system_id = id
        self.object_id_tag = object_id_tag if object_id_tag else id
        self.destination_id_tag = destination_id_tag
        self.instance = instance
        self.breaker = pybreaker.CircuitBreaker(
            fail_max=app.config['CIRCUIT_BREAKER_MAX_SYNTHESE_FAIL'],
            reset_timeout=app.config['CIRCUIT_BREAKER_SYNTHESE_TIMEOUT_S'])
        self.timezone = pytz.timezone(timezone)
        if not redis_host:
            self.rate_limiter = FakeRateLimiter()
        else:
            self.rate_limiter = RateLimiter(conditions=[{
                'requests': max_requests_by_second,
                'seconds': 1
            }],
                                            redis_host=redis_host,
                                            redis_port=redis_port,
                                            redis_db=redis_db,
                                            redis_password=redis_password,
                                            redis_namespace=redis_namespace)

    def __repr__(self):
        """
         used as the cache key. we use the rt_system_id to share the cache between servers in production
        """
        try:
            return self.rt_system_id.encode('utf-8', 'backslashreplace')
        except:
            return self.rt_system_id

    @cache.memoize(app.config['CACHE_CONFIGURATION'].get(
        'TIMEOUT_SYNTHESE', 30))
    def _call_synthese(self, url):
        """
        http call to synthese
        """
        try:
            if not self.rate_limiter.acquire(self.rt_system_id, block=False):
                raise RealtimeProxyError('maximum rate reached')
            return self.breaker.call(requests.get, url, timeout=self.timeout)
        except pybreaker.CircuitBreakerError as e:
            logging.getLogger(__name__).error(
                'Synthese RT service dead, using base '
                'schedule (error: {}'.format(e))
            raise RealtimeProxyError('circuit breaker open')
        except requests.Timeout as t:
            logging.getLogger(__name__).error(
                'Synthese RT service timeout, using base '
                'schedule (error: {}'.format(t))
            raise RealtimeProxyError('timeout')
        except redis.ConnectionError:
            logging.getLogger(__name__).exception(
                'there is an error with Redis')
            raise RealtimeProxyError('redis error')
        except Exception as e:
            logging.getLogger(__name__).exception(
                'Synthese RT error, using base schedule')
            raise RealtimeProxyError(str(e))

    def _get_next_passage_for_route_point(self,
                                          route_point,
                                          count=None,
                                          from_dt=None,
                                          current_dt=None,
                                          duration=None):
        url = self._make_url(route_point, count, from_dt)
        if not url:
            return None
        logging.getLogger(__name__).debug(
            'Synthese RT service , call url : {}'.format(url))
        r = self._call_synthese(url)
        if not r:
            return None

        if r.status_code != 200:
            # TODO better error handling, the response might be in 200 but in error
            logging.getLogger(__name__).error(
                'Synthese RT service unavailable, impossible to query : {}'.
                format(r.url))
            raise RealtimeProxyError('non 200 response')
            return None

        logging.getLogger(__name__).debug("synthese response: {}".format(
            r.text))
        passages = self._get_synthese_passages(r.content)

        return self._find_route_point_passages(route_point, passages)

    def _make_url(self, route_point, count=None, from_dt=None):
        """
        The url returns something like a departure on a stop point
        """

        stop_id = route_point.fetch_stop_id(self.object_id_tag)

        if not stop_id:
            # one a the id is missing, we'll not find any realtime
            logging.getLogger(__name__).debug(
                'missing realtime id for {obj}: stop code={s}'.format(
                    obj=route_point, s=stop_id))
            self.record_internal_failure('missing id')
            return None

        count_param = '&rn={c}'.format(c=count) if count else ''

        # if a custom datetime is provided we give it to timeo
        dt_param = '&date={dt}'.format(dt=self._timestamp_to_date(
            from_dt).strftime('%Y-%m-%d %H:%M')) if from_dt else ''

        url = "{base_url}?SERVICE=tdg&roid={stop_id}{count}{date}".format(
            base_url=self.service_url,
            stop_id=stop_id,
            count=count_param,
            date=dt_param)

        return url

    def _get_value(self, item, xpath, val):
        value = item.find(xpath)
        if value is None:
            logging.getLogger(__name__).debug(
                "Path not found: {path}".format(path=xpath))
            return None
        return value.get(val)

    def _get_real_time_passage(self, xml_journey):
        """
        :return RealTimePassage: object real time passage
        :param xml_journey: journey information
        exceptions :
            ValueError: Unable to parse datetime, day is out of range for month (for example)
        """
        dt = DateTimeFormat()(xml_journey.get('dateTime'))
        utc_dt = self.timezone.normalize(
            self.timezone.localize(dt)).astimezone(pytz.utc)
        passage = RealTimePassage(utc_dt)
        passage.is_real_time = (xml_journey.get('realTime') == 'yes')
        return passage

    @staticmethod
    def _build(xml):
        try:
            root = et.fromstring(xml)
        except et.ParseError as e:
            logging.getLogger(__name__).error("invalid xml: {}".format(e))
            raise
        for xml_journey in root.findall('journey'):
            yield xml_journey

    def _get_synthese_passages(self, xml):
        result = {}
        for xml_journey in self._build(xml):
            route_point = SyntheseRoutePoint(
                xml_journey.get('routeId'),
                self._get_value(xml_journey, 'stop', 'id'),
                self._get_value(xml_journey, 'line', 'id'))
            if route_point not in result:
                result[route_point] = []
            passage = self._get_real_time_passage(xml_journey)
            result[route_point].append(passage)
        return result

    def status(self):
        return {
            'id': unicode(self.rt_system_id),
            'timeout': self.timeout,
            'circuit_breaker': {
                'current_state': self.breaker.current_state,
                'fail_counter': self.breaker.fail_counter,
                'reset_timeout': self.breaker.reset_timeout
            },
        }

    def _timestamp_to_date(self, timestamp):
        dt = datetime.utcfromtimestamp(timestamp)
        dt = pytz.utc.localize(dt)
        return dt.astimezone(self.timezone)

    def _find_route_point_passages(self, route_point, passages):
        """
        To find the right passage in synthese:

        As a reminder we query synthese only for a stoppoint and we get, for all the routes that pass by
        this stop, the next passages.
        The tricky part is to find the which route concerns our routepoint

         * we first look if by miracle we can find some routes with the synthese code of our route in it's
         external codes (it can have several if the route is a fusion of many routes)
            -> if we found the routes (we can have more than one), we concatenate their passages
         * else we query navitia to get all routes that pass by the stoppoint for the line of the route point
            * if we get only one route, we search for this route's line in the synthese response
                (because lines synthese code are move coherent)
                -> we concatenate all synthese passages on this line
         -> else we return the base schedule
        """
        log = logging.getLogger(__name__)
        stop_point_id = str(route_point.fetch_stop_id(self.object_id_tag))
        is_same_route = lambda syn_rp: syn_rp.syn_route_id in route_point.fetch_all_route_id(
            self.object_id_tag)
        route_passages = [
            p for syn_rp, p in passages.items() if is_same_route(syn_rp)
            and stop_point_id == syn_rp.syn_stop_point_id
        ]

        if route_passages:
            return sorted(list(itertools.chain(*route_passages)),
                          key=lambda p: p.datetime)

        log.debug('impossible to find the route in synthese response, '
                  'looking for the line {}'.format(
                      route_point.fetch_line_uri()))

        routes_gen = self.instance.ptref.get_objs(
            type_pb2.ROUTE,
            'stop_point.uri = {stop} and line.uri = {line}'.format(
                stop=route_point.pb_stop_point.uri,
                line=route_point.fetch_line_uri()))

        first_routes = list(itertools.islice(routes_gen, 2))

        if len(first_routes) == 1:
            # there is only one route that pass through our stoppoint for the line of the routepoint
            # we can concatenate all synthese's route of this line
            line_passages = [
                p for syn_rp, p in passages.items() if syn_rp.syn_line_id ==
                route_point.fetch_line_id(self.object_id_tag)
            ]

            if line_passages:
                return sorted(list(itertools.chain(*line_passages)),
                              key=lambda p: p.datetime)

            log.debug(
                'stoppoint {sp} has {nb_r} routes for line {l} ({l_codes}) in navitia and {nb_syn_r} '
                'in synthese (lines: {syn_lines})'.format(
                    sp=route_point.pb_stop_point.uri,
                    nb_r=len(first_routes),
                    l=route_point.fetch_line_uri(),
                    l_codes=route_point.fetch_line_id(self.object_id_tag),
                    nb_syn_r=len(passages),
                    syn_lines=[l.syn_line_id for l in passages.keys()]))

        if passages:
            log.info(
                'impossible to find a valid passage for {} (passage = {})'.
                format(route_point, passages))

        return None
Exemplo n.º 7
0
class Synthese(RealtimeProxy):
    """
    class managing calls to timeo external service providing real-time next passages
    """
    def __init__(self,
                 id,
                 service_url,
                 timezone,
                 object_id_tag=None,
                 timeout=10,
                 redis_host=None,
                 redis_db=0,
                 redis_port=6379,
                 redis_password=None,
                 max_requests_by_second=15,
                 redis_namespace='jormungandr.rate_limiter'):
        self.service_url = service_url
        self.timeout = timeout  # timeout in seconds
        self.rt_system_id = id
        self.object_id_tag = object_id_tag if object_id_tag else id
        self.breaker = pybreaker.CircuitBreaker(
            fail_max=app.config['CIRCUIT_BREAKER_MAX_SYNTHESE_FAIL'],
            reset_timeout=app.config['CIRCUIT_BREAKER_SYNTHESE_TIMEOUT_S'])
        self.timezone = pytz.timezone(timezone)
        if not redis_host:
            self.rate_limiter = FakeRateLimiter()
        else:
            self.rate_limiter = RateLimiter(conditions=[{
                'requests': max_requests_by_second,
                'seconds': 1
            }],
                                            redis_host=redis_host,
                                            redis_port=redis_port,
                                            redis_db=redis_db,
                                            redis_password=redis_password,
                                            redis_namespace=redis_namespace)

    def __repr__(self):
        """
         used as the cache key. we use the rt_system_id to share the cache between servers in production
        """
        return self.rt_system_id

    @cache.memoize(app.config['CACHE_CONFIGURATION'].get(
        'TIMEOUT_SYNTHESE', 30))
    def _call_synthese(self, url):
        """
        http call to synthese
        """
        try:
            if not self.rate_limiter.acquire(self.rt_system_id, block=False):
                return None  #this should not be cached :(
            return self.breaker.call(requests.get, url, timeout=self.timeout)
        except pybreaker.CircuitBreakerError as e:
            logging.getLogger(__name__).error(
                'Synthese RT service dead, using base '
                'schedule (error: {}'.format(e))
        except requests.Timeout as t:
            logging.getLogger(__name__).error(
                'Synthese RT service timeout, using base '
                'schedule (error: {}'.format(t))
        except redis.ConnectionError:
            logging.getLogger(__name__).exception(
                'there is an error with Redis')
        except:
            logging.getLogger(__name__).exception(
                'Synthese RT error, using base schedule')
        return None

    def next_passage_for_route_point(self, route_point):
        url = self._make_url(route_point)
        if not url:
            return None

        r = self._call_synthese(url)
        if not r:
            return None

        if r.status_code != 200:
            # TODO better error handling, the response might be in 200 but in error
            logging.getLogger(__name__).error(
                'Synthese RT service unavailable, impossible to query : {}'.
                format(r.url))
            return None

        logging.getLogger(__name__).debug("synthese response: {}".format(
            r.text))
        stop_point_id = str(route_point.fetch_stop_id(self.object_id_tag))
        route_id = str(route_point.fetch_route_id(self.object_id_tag))
        route_point = SyntheseRoutePoint(route_id, stop_point_id)
        m = self._get_synthese_passages(r.content)
        return m.get(
            route_point)  # if there is nothing from synthese, we keep the base

    def _make_url(self, route_point):
        """
        The url returns something like a departure on a stop point
        """

        stop_id = route_point.fetch_stop_id(self.object_id_tag)

        if not stop_id:
            # one a the id is missing, we'll not find any realtime
            logging.getLogger(__name__).debug(
                'missing realtime id for {obj}: stop code={s}'.format(
                    obj=route_point, s=stop_id))
            return None

        url = "{base_url}?SERVICE=tdg&roid={stop_id}".format(
            base_url=self.service_url, stop_id=stop_id)

        return url

    def _get_value(self, item, xpath, val):
        value = item.find(xpath)
        if value == None:
            logging.getLogger(__name__).debug(
                "Path not found: {path}".format(path=xpath))
            return None
        return value.get(val)

    def _get_real_time_passage(self, xml_journey):
        '''
        :return RealTimePassage: object real time passage
        :param xml_journey: journey information
        exceptions :
            ValueError: Unable to parse datetime, day is out of range for month (for example)
        '''
        dt = date_time_format(xml_journey.get('dateTime'))
        utc_dt = self.timezone.normalize(
            self.timezone.localize(dt)).astimezone(pytz.utc)
        passage = RealTimePassage(utc_dt)
        passage.is_real_time = (xml_journey.get('realTime') == 'yes')
        return passage

    @staticmethod
    def _build(xml):
        try:
            root = et.fromstring(xml)
        except et.ParseError as e:
            logging.getLogger(__name__).error("invalid xml: {}".format(
                e.message))
            raise
        for xml_journey in root.findall('journey'):
            yield xml_journey

    def _get_synthese_passages(self, xml):
        result = {}
        for xml_journey in self._build(xml):
            route_point = SyntheseRoutePoint(
                xml_journey.get('routeId'),
                self._get_value(xml_journey, 'stop', 'id'))
            if route_point not in result:
                result[route_point] = []
            passage = self._get_real_time_passage(xml_journey)
            result[route_point].append(passage)
        return result

    def status(self):
        return {
            'id': self.rt_system_id,
            'timeout': self.timeout,
            'circuit_breaker': {
                'current_state': self.breaker.current_state,
                'fail_counter': self.breaker.fail_counter,
                'reset_timeout': self.breaker.reset_timeout
            },
        }