def _poll_feed(self, model_builder): contributor = ( model.Contributor.query_existing() .filter_by(id=self.config.get("contributor"), connector_type=self.connector_type) .first() ) try: response = self._retrieve_feed() response.raise_for_status() # We changed the way we use ETag to check if a new feed has to be processed. # see _retrieve_feed function # The 304 status code means the feed did not change if response.status_code == 304: new_relic.ignore_transaction() manage_db_no_new(contributor_id=contributor.id) return elif response.status_code == 200: # we store the etag from the response, as it has changed self._update_redis_etag(response) except Exception as e: manage_db_error( data="", contributor_id=contributor.id, error="Http Error", is_reprocess_same_data_allowed=True, ) logger = logging.LoggerAdapter(logging.getLogger(__name__), extra={"contributor": contributor.id}) logger.debug(str(e)) return wrap_build(model_builder(contributor), response.content)
def gtfs_poller(self, config): func_name = "gtfs_poller" contributor = ( model.Contributor.query_existing() .filter_by(id=config.get("contributor"), connector_type=ConnectorType.gtfs_rt.value) .first() ) logger = logging.LoggerAdapter(logging.getLogger(__name__), extra={"contributor": contributor.id}) lock_name = make_kirin_lock_name(func_name, contributor.id) with get_lock(logger, lock_name, app.config[str("REDIS_LOCK_TIMEOUT_POLLER")]) as locked: if not locked or not config.get("feed_url"): new_relic.ignore_transaction() return retrieval_interval = config.get("retrieval_interval", 10) if _is_last_call_too_recent(func_name, contributor.id, retrieval_interval): # do nothing if the last call is too recent new_relic.ignore_transaction() return logger.debug("polling of %s", config.get("feed_url")) # We do a HEAD request at the very beginning of polling and we compare it with the previous one to check if # the gtfs-rt is changed. # If the HEAD request or Redis get/set fail, we just ignore this part and do the polling anyway if not _is_newer(config): new_relic.ignore_transaction() manage_db_no_new(connector_type=ConnectorType.gtfs_rt.value, contributor_id=contributor.id) return try: response = _retrieve_gtfsrt(config) response.raise_for_status() except Exception as e: manage_db_error( data="", connector_type=ConnectorType.gtfs_rt.value, contributor_id=contributor.id, error="Http Error", is_reprocess_same_data_allowed=True, ) logger.debug(six.text_type(e)) return wrap_build(KirinModelBuilder(contributor), response.content) logger.info("%s for %s is finished", func_name, contributor.id)
def build_rt_update(self, input_raw): # create a raw gtfs-rt obj, save the raw protobuf into the db proto = gtfs_realtime_pb2.FeedMessage() log_dict = {} try: proto.ParseFromString(input_raw) except DecodeError: # We save the non-decodable flux gtfs-rt rt_update = manage_db_error( input_raw.encode( "string_escape", "ignore"), # protect for PostgreSQL "Text" type ConnectorType.gtfs_rt.value, contributor_id=self.contributor.id, error="invalid protobuf", is_reprocess_same_data_allowed=False, ) return rt_update, log_dict feed = six.binary_type( proto) # temp, for the moment, we save the protobuf as text rt_update = make_rt_update( feed, connector_type=self.contributor.connector_type, contributor_id=self.contributor.id) rt_update.proto = proto return rt_update, log_dict
def gtfs_poller(self, config): func_name = 'gtfs_poller' logger = logging.LoggerAdapter(logging.getLogger(__name__), extra={'contributor': config['contributor']}) logger.debug('polling of %s', config['feed_url']) contributor = config['contributor'] lock_name = make_kirin_lock_name(func_name, contributor) with get_lock(logger, lock_name, app.config['REDIS_LOCK_TIMEOUT_POLLER']) as locked: if not locked: new_relic.ignore_transaction() return # We do a HEAD request at the very beginning of polling and we compare it with the previous one to check if # the gtfs-rt is changed. # If the HEAD request or Redis get/set fail, we just ignore this part and do the polling anyway if not _is_newer(config): new_relic.ignore_transaction() manage_db_no_new(connector='gtfs-rt', contributor=contributor) return try: response = requests.get(config['feed_url'], timeout=config.get('timeout', 1)) response.raise_for_status() except Exception as e: manage_db_error(data='', connector='gtfs-rt', contributor=contributor, status='KO', error='Http Error') logger.debug(str(e)) return nav = navitia_wrapper.Navitia(url=config['navitia_url'], token=config['token'], timeout=5, cache=redis, query_timeout=app.config.get('NAVITIA_QUERY_CACHE_TIMEOUT', 600), pubdate_timeout=app.config.get('NAVITIA_PUBDATE_CACHE_TIMEOUT', 600))\ .instance(config['coverage']) proto = gtfs_realtime_pb2.FeedMessage() try: proto.ParseFromString(response.content) except DecodeError: manage_db_error(proto, 'gtfs-rt', contributor=contributor, status='KO', error='Decode Error') logger.debug('invalid protobuf') else: model_maker.handle(proto, nav, contributor) logger.info('%s for %s is finished', func_name, contributor)
def post(self): raw_proto = _get_gtfs_rt(flask.globals.request) from kirin import gtfs_realtime_pb2 # create a raw gtfs-rt obj, save the raw protobuf into the db proto = gtfs_realtime_pb2.FeedMessage() try: proto.ParseFromString(raw_proto) except DecodeError: #We save the non-decodable flux gtfs-rt manage_db_error(proto, 'gtfs-rt', contributor=self.contributor, status='KO', error='Decode Error') raise InvalidArguments('invalid protobuf') else: model_maker.handle(proto, self.navitia_wrapper, self.contributor) return 'OK', 200
def build_rt_update(self, input_raw): log_dict = {} try: ElementTree.fromstring(input_raw) except ParseError: # We save the non-decodable xml feed rt_update = manage_db_error( str(input_raw, encoding="utf-8", errors="replace"), # protect for PostgreSQL "Text" type contributor_id=self.contributor.id, error="invalid xml", is_reprocess_same_data_allowed=False, ) return rt_update, log_dict rt_update = make_rt_update(input_raw, contributor_id=self.contributor.id) return rt_update, log_dict
def build_rt_update(self, input_raw): # create a raw gtfs-rt obj, save the raw protobuf into the db (as text) proto = gtfs_realtime_pb2.FeedMessage() log_dict = {} try: proto.ParseFromString(input_raw) except DecodeError: # We save the non-decodable gtfs-rt feed rt_update = manage_db_error( str(input_raw, encoding="utf-8", errors="replace"), # protect for PostgreSQL "Text" type contributor_id=self.contributor.id, error="invalid protobuf", is_reprocess_same_data_allowed=False, ) return rt_update, log_dict feed = str(proto) # temp, for the moment, we save the protobuf as text rt_update = make_rt_update(feed, contributor_id=self.contributor.id) rt_update.proto = proto return rt_update, log_dict