def _is_newer(config): logger = logging.LoggerAdapter(logging.getLogger(__name__), extra={"contributor": config["contributor"]}) contributor = config["contributor"] try: head = requests.head(config["feed_url"], timeout=config.get("timeout", 1)) new_etag = head.headers.get("ETag") if not new_etag: return True # unable to get a ETag, we continue the polling etag_key = build_redis_etag_key(contributor) old_etag = redis_client.get(etag_key) if new_etag == old_etag: logger.info("get the same ETag of %s, skipping the polling for %s", etag_key, contributor) return False redis_client.set(etag_key, new_etag) except Exception as e: logger.debug( "exception occurred when checking the newer version of gtfs for %s: %s", contributor, six.text_type(e), ) return True # whatever the exception is, we don't want to break the polling
def test_sirietliteidfm_poller(): responses_list = [ { "content": b"{}", "status_code": 200, "headers": { "ETag": "etag1" } }, { "status_code": 304, "headers": { "ETag": "etag1" } }, { "content": b"{}", "status_code": 200, "headers": { "ETag": "etag2" } }, ] config = { "contributor": SIRI_ET_LITE_IDFM_CONTRIBUTOR_ID, "navitia_url": app.config.get("NAVITIA_URL"), "token": "navitia_token", "coverage": "navitia_coverage", "feed_url": FEED_URL, # No retrieval interval to be able to poll multiple times in testsuccessively "retrieval_interval": 0, "timeout": app.config.get("SIRI_ET_LITE_IDFM_RT_TIMEOUT", 1), } etag_key = build_redis_etag_key(config["contributor"]) poller = Poller("test_siri_et_lite_idfm_poller", ConnectorType.siri_et_lite_idfm.value, config) with requests_mock.Mocker() as m: m.get(FEED_URL, response_list=responses_list) with app.app_context(): poller.poll( kirin.poller_workers.siri_et_lite_idfm.KirinModelBuilder) assert len(RealTimeUpdate.query.all()) == 1 assert len(TripUpdate.query.all()) == 0 assert len(StopTimeUpdate.query.all()) == 0 assert b"etag1" == kirin.redis_client.get(etag_key) poller.poll( kirin.poller_workers.siri_et_lite_idfm.KirinModelBuilder) assert len(RealTimeUpdate.query.all()) == 1 assert len(TripUpdate.query.all()) == 0 assert len(StopTimeUpdate.query.all()) == 0 assert b"etag1" == kirin.redis_client.get(etag_key) poller.poll( kirin.poller_workers.siri_et_lite_idfm.KirinModelBuilder) assert len(RealTimeUpdate.query.all()) == 2 assert len(TripUpdate.query.all()) == 0 assert len(StopTimeUpdate.query.all()) == 0 assert b"etag2" == kirin.redis_client.get(etag_key)
def test_poller_with_error_response(input_gtfs_rt): responses_list = [ { "content": input_gtfs_rt, "status_code": 200, "headers": { "ETag": "etag1" } }, { "status_code": 400 }, { "content": basic_gtfs_rt_data_last_stop_without_delays(), "status_code": 200 }, ] config = { "contributor": GTFS_CONTRIBUTOR_ID, "navitia_url": app.config.get("NAVITIA_URL"), "token": "navitia_token", "coverage": "navitia_coverage", "feed_url": FEED_URL, # No retrieval interval to be able to poll successively in test "retrieval_interval": 0, "timeout": app.config.get("GTFS_RT_TIMEOUT", 1), } etag_key = build_redis_etag_key(config["contributor"]) poller = Poller("test_gtfs_poller", ConnectorType.gtfs_rt.value, config) with requests_mock.Mocker() as m: m.get(FEED_URL, response_list=responses_list) with app.app_context(): poller.poll(kirin.poller_workers.gtfs_rt.KirinModelBuilder) assert len(RealTimeUpdate.query.all()) == 1 assert len(TripUpdate.query.all()) == 1 assert len(StopTimeUpdate.query.all()) == 4 assert b"etag1" == kirin.redis_client.get(etag_key) assert RealTimeUpdate.query.order_by( desc(RealTimeUpdate.created_at)).first().status == "OK" poller.poll(kirin.poller_workers.gtfs_rt.KirinModelBuilder) assert len(RealTimeUpdate.query.all()) == 2 assert len(TripUpdate.query.all()) == 1 assert len(StopTimeUpdate.query.all()) == 4 assert kirin.redis_client.get(etag_key) is None assert RealTimeUpdate.query.order_by( desc(RealTimeUpdate.created_at)).first().raw_data == "" assert RealTimeUpdate.query.order_by( desc(RealTimeUpdate.created_at)).first().status == "KO" assert RealTimeUpdate.query.order_by( desc(RealTimeUpdate.created_at)).first().error == "Http Error" poller.poll(kirin.poller_workers.gtfs_rt.KirinModelBuilder) assert len(RealTimeUpdate.query.all()) == 3 assert len(TripUpdate.query.all()) == 1 assert len(StopTimeUpdate.query.all()) == 4 assert kirin.redis_client.get(etag_key) is None assert RealTimeUpdate.query.order_by( desc(RealTimeUpdate.created_at)).first().status == "OK"
def _update_redis_etag(self, resp): try: new_etag = resp.headers.get("ETag") etag_key = build_redis_etag_key(self.config["contributor"]) if not new_etag: kirin.redis_client.delete(etag_key) else: kirin.redis_client.set(etag_key, new_etag) except Exception as _: logger = logging.LoggerAdapter( logging.getLogger(__name__), extra={"contributor": self.config["contributor"]} ) logger.debug("Could not set a new etag on Redis")
def _retrieve_feed(self): start_dt = datetime.utcnow() headers = {} # Adding a If-None-Match header to check if the feed has changed or not # https://developer.mozilla.org/fr/docs/Web/HTTP/Headers/If-None-Match etag_key = build_redis_etag_key(self.config["contributor"]) try: old_etag = kirin.redis_client.get(etag_key) if old_etag: headers = {"If-None-Match": old_etag} except Exception as _: logger = logging.LoggerAdapter( logging.getLogger(__name__), extra={"contributor": self.config["contributor"]} ) logger.debug("Could not retrieve an old etag on Redis") resp = requests.get(self.config["feed_url"], timeout=self.config.get("timeout", 1), headers=headers) duration_ms = (datetime.utcnow() - start_dt).total_seconds() * 1000 record_input_retrieval(contributor=self.config["contributor"], duration_ms=duration_ms) return resp