def handle(builder, real_time_update, trip_updates): """ Receive a RealTimeUpdate with at least one TripUpdate filled with the data received by the connector. Each TripUpdate is associated with the base-schedule VehicleJourney, complete/merge realtime is done using builder Then persist in db, publish for Navitia Returns real_time_update and the log_dict """ if not real_time_update: raise TypeError() id_timestamp_tuples = [(tu.vj.navitia_trip_id, tu.vj.start_timestamp) for tu in trip_updates] old_trip_updates = TripUpdate.find_by_dated_vjs(id_timestamp_tuples) for trip_update in trip_updates: # find if there is already a row in db old = next( (tu for tu in old_trip_updates if tu.vj.navitia_trip_id == trip_update.vj.navitia_trip_id and tu.vj.start_timestamp == trip_update.vj.start_timestamp), None, ) # merge the base schedule, the current realtime, and the new realtime current_trip_update = builder.merge_trip_updates( trip_update.vj.navitia_vj, old, trip_update) # manage and adjust consistency if possible if current_trip_update is not None and check_consistency( current_trip_update): # we have to link the current_vj_update with the new real_time_update # this link is done quite late to avoid too soon persistence of trip_update by sqlalchemy current_trip_update.real_time_updates.append(real_time_update) db_commit(real_time_update) feed = convert_to_gtfsrt(real_time_update.trip_updates, gtfs_realtime_pb2.FeedHeader.DIFFERENTIAL) feed_str = feed.SerializeToString() publish(feed_str, builder.contributor.id) data_time = datetime.datetime.utcfromtimestamp(feed.header.timestamp) log_dict = { "contributor": builder.contributor.id, "timestamp": data_time, "output_trip_update_count": len(feed.entity), "output_feed_size": sys.getsizeof(feed_str), } # After merging trip_updates information of connector realtime, navitia and kirin database, if there is no new # information destined to navitia, update real_time_update with status = 'KO' and a proper error message. if not real_time_update.trip_updates and real_time_update.status == "pending": msg = "No new information destined for navitia on {}".format( real_time_update.contributor_id) set_rtu_status_ko(real_time_update, msg, is_reprocess_same_data_allowed=False) logging.getLogger(__name__).warning("RealTimeUpdate id={}: {}".format( real_time_update.id, msg), extra=log_dict) db_commit(real_time_update) return real_time_update, log_dict
def build_trip_updates(self, rt_update): log_dict = {} feed = None try: feed = ElementTree.fromstring(rt_update.raw_data) except ParseError as e: raise InvalidArguments("invalid XML: {}".format(e)) assert isinstance(feed, xml.etree.ElementTree.Element) input_timestamp = self._get_input_timestamp(feed) if input_timestamp is None: self.log.info( "Missing 'ResponseTimestamp' in the SIRI-ET-XML-TN feed; the feed will not be processed" ) return [], log_dict log_dict.update({"input_timestamp": input_timestamp}) self.log.debug( "Start processing siri-et-xml-tn: timestamp = {} ".format( input_timestamp)) tu = self._build_trip_update(feed) if tu is None: msg = "No information for this siri-et-xml-tn with timestamp: {}".format( input_timestamp) set_rtu_status_ko(rt_update, msg, is_reprocess_same_data_allowed=False) self.log.warning(msg) return [], log_dict return [tu], log_dict
def build_trip_updates(self, rt_update): """ parse the gtfs-rt protobuf stored in the rt_update object (in Kirin db) and return a list of trip updates The TripUpdates are not associated with the RealTimeUpdate at this point """ log_dict = {} if not hasattr(rt_update, "proto"): # GOTCHA: should match error message from build_rt_update(), as it will override it. # It allows manage_db_error() to work as expected. # TODO: improve that, but things are quite intricate for error/log management here. raise InvalidArguments("invalid protobuf") proto = rt_update.proto gtfsrt_data_time = timestamp_to_utc_naive_dt(proto.header.timestamp) log_dict.update({"input_timestamp": gtfsrt_data_time}) self.log.debug( "Start processing GTFS-rt: timestamp = {} ({})".format(proto.header.timestamp, gtfsrt_data_time) ) trip_updates = [] for entity in proto.entity: if not entity.HasField("trip_update"): # TODO: log and count in NR continue try: tu = self._build_one_trip_update(entity.trip_update, gtfsrt_data_time=gtfsrt_data_time) if tu is not None: trip_updates.append(tu) except Exception as e: if is_only_warning_exception(e): # TODO: log and count in NR pass else: # TODO: log and count in NR? # we want to track unexpected errors as usual # (and interrupting the feed-processing is OK in that case) raise if not trip_updates: msg = "No information for this gtfs-rt with timestamp: {}".format(proto.header.timestamp) set_rtu_status_ko(rt_update, msg, is_reprocess_same_data_allowed=False) self.log.warning(msg) return trip_updates, log_dict
def build_trip_updates(self, rt_update): """ parse the gtfs-rt protobuf stored in the rt_update object (in Kirin db) and return a list of trip updates The TripUpdates are not associated with the RealTimeUpdate at this point """ log_dict = {} if not hasattr(rt_update, "proto"): # GOTCHA: should match error message from build_rt_update(), as it will override it. # It allows manage_db_error() to work as expected. # TODO: improve that, but things are quite intricate for error/log management here. raise InvalidArguments("invalid protobuf") proto = rt_update.proto input_data_time = datetime.datetime.utcfromtimestamp( proto.header.timestamp) log_dict.update({"input_timestamp": input_data_time}) self.log.debug("Start processing GTFS-rt: timestamp = {} ({})".format( proto.header.timestamp, input_data_time)) trip_updates = [] for entity in proto.entity: if not entity.trip_update: continue tu = self._make_trip_updates(entity.trip_update, input_data_time=input_data_time) trip_updates.extend(tu) if not trip_updates: msg = "No information for this gtfs-rt with timestamp: {}".format( proto.header.timestamp) set_rtu_status_ko(rt_update, msg, is_reprocess_same_data_allowed=False) self.log.warning(msg) log_dict = {} return trip_updates, log_dict
def wrap_build(builder, input_raw): """ Function wrapping the processing of realtime information of an external feed This manages errors/logger/newrelic :param builder: the KirinModelBuilder to be called (must inherit from abstract_builder.AbstractKirinModelBuilder) :param input_raw: the feed to process """ contributor = builder.contributor start_datetime = datetime.datetime.utcnow() rt_update = None log_dict = {"contributor": contributor.id} record_custom_parameter("contributor", contributor.id) status = "OK" log_dict.update({"input_feed_size": sys.getsizeof(input_raw)}) try: # create a raw rt_update obj, save the raw_input into the db rt_update, rtu_log_dict = builder.build_rt_update(input_raw) log_dict.update(rtu_log_dict) # raw_input is interpreted trip_updates, tu_log_dict = builder.build_trip_updates(rt_update) log_dict.update(tu_log_dict) # finally confront to previously existing information (base_schedule, previous real-time) _, handler_log_dict = handle(builder, rt_update, trip_updates) log_dict.update(handler_log_dict) except Exception as e: status = "warning" if is_only_warning_exception(e) else "failure" allow_reprocess = is_reprocess_allowed(e) if rt_update is not None: error = e.data["error"] if (isinstance(e, KirinException) and "error" in e.data) else str(e) set_rtu_status_ko(rt_update, error, is_reprocess_same_data_allowed=allow_reprocess) db_commit(rt_update) else: # rt_update is not built, make sure reprocess is allowed allow_reprocess_same_data(contributor.id) log_dict.update({"exc_summary": str(e), "reason": str(e)}) record_custom_parameter( "reason", str(e)) # using __str__() here to have complete details # Re-raise all exceptions (as flask manages exceptions for output) # try/except mechanism must wrap calls to this wrap_build() function (auto. for flask) # See CONTRIBUTING.md for newrelic's error filtering raise finally: if rt_update is not None and rt_update.status == "pending": rt_update.status = "OK" if (status == "OK") else "KO" db_commit(rt_update) log_dict.update({ "duration": (datetime.datetime.utcnow() - start_datetime).total_seconds() }) record_call("kirin_feed_process_info", status, **log_dict) if status == "OK": logging.getLogger(__name__).info(status, extra=log_dict) elif status == "warning": logging.getLogger(__name__).warning(status, extra=log_dict) else: logging.getLogger(__name__).error(status, extra=log_dict)
def wrap_build(builder, input_raw): """ Function wrapping the processing of realtime information of an external feed This manages errors/logger/newrelic :param builder: the KirinModelBuilder to be called :param input_raw: the feed to process """ contributor = builder.contributor start_datetime = datetime.utcnow() rt_update = None log_dict = {"contributor": contributor.id} status = "OK" try: # create a raw rt_update obj, save the raw_input into the db rt_update, rtu_log_dict = builder.build_rt_update(input_raw) log_dict.update(rtu_log_dict) # raw_input is interpreted trip_updates, tu_log_dict = builder.build_trip_updates(rt_update) log_dict.update(tu_log_dict) # finally confront to previously existing information (base_schedule, previous real-time) _, handler_log_dict = core.handle(rt_update, trip_updates, contributor.id, builder.is_new_complete) log_dict.update(handler_log_dict) except Exception as e: status = "failure" allow_reprocess = True if is_invalid_input_exception(e): status = "warning" # Kirin did his job correctly if the input is invalid and rejected allow_reprocess = False # reprocess is useless if input is invalid if rt_update is not None: error = e.data["error"] if (isinstance(e, KirinException) and "error" in e.data) else e.message set_rtu_status_ko(rt_update, error, is_reprocess_same_data_allowed=allow_reprocess) model.db.session.add(rt_update) model.db.session.commit() else: # rt_update is not built, make sure reprocess is allowed allow_reprocess_same_data(contributor.id) log_dict.update({"exc_summary": six.text_type(e), "reason": e}) record_custom_parameter( "reason", e) # using __str__() here to have complete details raise # filters later for APM (auto.) finally: log_dict.update( {"duration": (datetime.utcnow() - start_datetime).total_seconds()}) record_call(status, **log_dict) if status == "OK": logging.getLogger(__name__).info(status, extra=log_dict) elif status == "warning": logging.getLogger(__name__).warning(status, extra=log_dict) else: logging.getLogger(__name__).error(status, extra=log_dict)