def event_records(event_records_filter, **_kwargs): asset_key = event_records_filter.asset_key after_cursor = event_records_filter.after_cursor matching_events = [ event for event in asset_events if asset_key.path[-1] == event[0] and ( after_cursor is None or event[1] > after_cursor) ] return [ EventLogRecord(storage_id=event[1], event_log_entry=None) for event in matching_events ]
def get_event_records( self, event_records_filter: Optional[EventRecordsFilter] = None, limit: Optional[int] = None, ascending: bool = False, ) -> Iterable[EventLogRecord]: """Overridden method to enable cross-run event queries in sqlite. The record id in sqlite does not auto increment cross runs, so instead of fetching events after record id, we only fetch events whose runs updated after update_timestamp. """ check.opt_inst_param(event_records_filter, "event_records_filter", EventRecordsFilter) check.opt_int_param(limit, "limit") check.bool_param(ascending, "ascending") is_asset_query = event_records_filter and ( event_records_filter.event_type == DagsterEventType.ASSET_MATERIALIZATION or event_records_filter.event_type == DagsterEventType.ASSET_OBSERVATION) if is_asset_query: # asset materializations and observations get mirrored into the index shard, so no # custom run shard-aware cursor logic needed return super(SqliteEventLogStorage, self).get_event_records( event_records_filter=event_records_filter, limit=limit, ascending=ascending) query = db.select( [SqlEventLogStorageTable.c.id, SqlEventLogStorageTable.c.event]) if event_records_filter and event_records_filter.asset_key: asset_details = next( iter(self._get_assets_details([event_records_filter.asset_key ]))) else: asset_details = None if not event_records_filter or not (isinstance( event_records_filter.after_cursor, RunShardedEventsCursor)): warnings.warn(""" Called `get_event_records` on a run-sharded event log storage with a query that is not run aware (e.g. not using a RunShardedEventsCursor). This likely has poor performance characteristics. Consider adding a RunShardedEventsCursor to your query or switching your instance configuration to use a non-run sharded event log storage (e.g. PostgresEventLogStorage, ConsolidatedSqliteEventLogStorage) """) query = self._apply_filter_to_query( query=query, event_records_filter=event_records_filter, asset_details=asset_details, apply_cursor_filters= False, # run-sharded cursor filters don't really make sense ) if limit: query = query.limit(limit) if ascending: query = query.order_by(SqlEventLogStorageTable.c.timestamp.asc()) else: query = query.order_by(SqlEventLogStorageTable.c.timestamp.desc()) # workaround for the run-shard sqlite to enable cross-run queries: get a list of run_ids # whose events may qualify the query, and then open run_connection per run_id at a time. run_updated_after = ( event_records_filter.after_cursor.run_updated_after if event_records_filter and isinstance( event_records_filter.after_cursor, RunShardedEventsCursor) else None) run_records = self._instance.get_run_records( filters=RunsFilter(updated_after=run_updated_after), order_by="update_timestamp", ascending=ascending, ) event_records = [] for run_record in run_records: run_id = run_record.pipeline_run.run_id with self.run_connection(run_id) as conn: results = conn.execute(query).fetchall() for row_id, json_str in results: try: event_record = deserialize_json_to_dagster_namedtuple( json_str) if not isinstance(event_record, EventLogEntry): logging.warning( "Could not resolve event record as EventLogEntry for id `{}`." .format(row_id)) continue else: event_records.append( EventLogRecord(storage_id=row_id, event_log_entry=event_record)) if limit and len(event_records) >= limit: break except seven.JSONDecodeError: logging.warning( "Could not parse event record id `{}`.".format(row_id)) if limit and len(event_records) >= limit: break return event_records[:limit]