def read( self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None) -> Iterator[AirbyteMessage]: """Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.io/architecture/airbyte-specification.""" connector_state = copy.deepcopy(state or {}) logger.info(f"Starting syncing {self.name}") # TODO assert all streams exist in the connector # get the streams once in case the connector needs to make any queries to generate them stream_instances = {s.name: s for s in self.streams(config)} for configured_stream in catalog.streams: try: stream_instance = stream_instances[ configured_stream.stream.name] yield from self._read_stream( logger=logger, stream_instance=stream_instance, configured_stream=configured_stream, connector_state=connector_state) except Exception as e: logger.exception( f"Encountered an exception while reading stream {self.name}" ) raise e logger.info(f"Finished syncing {self.name}")
def _read_incremental( self, logger: AirbyteLogger, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, connector_state: MutableMapping[str, Any], ) -> Iterator[AirbyteMessage]: stream_name = configured_stream.stream.name stream_state = connector_state.get(stream_name, {}) if stream_state: logger.info(f"Setting state of {stream_name} stream to {stream_state.get(stream_name)}") checkpoint_interval = stream_instance.state_checkpoint_interval slices = stream_instance.stream_slices( cursor_field=configured_stream.cursor_field, sync_mode=SyncMode.incremental, stream_state=stream_state ) for slice in slices: record_counter = 0 records = stream_instance.read_records( sync_mode=SyncMode.incremental, stream_slice=slice, stream_state=stream_state, cursor_field=configured_stream.cursor_field or None, ) for record_data in records: record_counter += 1 yield self._as_airbyte_record(stream_name, record_data) stream_state = stream_instance.get_updated_state(stream_state, record_data) if checkpoint_interval and record_counter % checkpoint_interval == 0: yield self._checkpoint_state(stream_name, stream_state, connector_state, logger) yield self._checkpoint_state(stream_name, stream_state, connector_state, logger)
def _read_stream( self, logger: AirbyteLogger, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, connector_state: MutableMapping[str, Any], ) -> Iterator[AirbyteMessage]: use_incremental = configured_stream.sync_mode == SyncMode.incremental and stream_instance.supports_incremental if use_incremental: record_iterator = self._read_incremental(logger, stream_instance, configured_stream, connector_state) else: record_iterator = self._read_full_refresh(stream_instance, configured_stream) record_counter = 0 stream_name = configured_stream.stream.name logger.info(f"Syncing stream: {stream_name} ") for record in record_iterator: if record.type == MessageType.RECORD: record_counter += 1 yield record logger.info(f"Read {record_counter} records from {stream_name} stream")