示例#1
0
 def _read_full_refresh(
     self,
     logger: logging.Logger,
     stream_instance: Stream,
     configured_stream: ConfiguredAirbyteStream,
     internal_config: InternalConfig,
 ) -> Iterator[AirbyteMessage]:
     slices = stream_instance.stream_slices(
         sync_mode=SyncMode.full_refresh,
         cursor_field=configured_stream.cursor_field)
     logger.debug(
         f"Processing stream slices for {configured_stream.stream.name}",
         extra={"stream_slices": slices})
     total_records_counter = 0
     for _slice in slices:
         logger.debug("Processing stream slice", extra={"slice": _slice})
         records = stream_instance.read_records(
             stream_slice=_slice,
             sync_mode=SyncMode.full_refresh,
             cursor_field=configured_stream.cursor_field,
         )
         for record in records:
             yield self._as_airbyte_record(configured_stream.stream.name,
                                           record)
             total_records_counter += 1
             if self._limit_reached(internal_config, total_records_counter):
                 return
示例#2
0
def _read_incremental(stream_instance: Stream, stream_state: MutableMapping[str, Any]):
    res = []
    slices = stream_instance.stream_slices(sync_mode=SyncMode.incremental, stream_state=stream_state)
    for slice in slices:
        records = stream_instance.read_records(sync_mode=SyncMode.incremental, stream_slice=slice, stream_state=stream_state)
        for record in records:
            res.append(record)
    return res, stream_instance.state
示例#3
0
def read_full_refresh(stream_instance: Stream):
    records = []
    slices = stream_instance.stream_slices(sync_mode=SyncMode.full_refresh)
    for slice in slices:
        records.extend(
            list(
                stream_instance.read_records(stream_slice=slice,
                                             sync_mode=SyncMode.full_refresh)))
    return records
示例#4
0
文件: utils.py 项目: Mu-L/airbyte
def read_full_refresh(stream_instance: Stream):
    res = []
    schema = stream_instance.get_json_schema()
    slices = stream_instance.stream_slices(sync_mode=SyncMode.full_refresh)
    for slice in slices:
        records = stream_instance.read_records(stream_slice=slice, sync_mode=SyncMode.full_refresh)
        for record in records:
            stream_instance.transformer.transform(record, schema)
            res.append(record)
    return res
示例#5
0
文件: utils.py 项目: Mu-L/airbyte
def read_incremental(stream_instance: Stream, stream_state: MutableMapping[str, Any]):
    res = []
    if stream_state and "state" in dir(stream_instance):
        stream_instance.state = stream_state
    slices = stream_instance.stream_slices(sync_mode=SyncMode.incremental, stream_state=stream_state)
    for slice in slices:
        records = stream_instance.read_records(sync_mode=SyncMode.incremental, stream_slice=slice, stream_state=stream_state)
        for record in records:
            stream_state = stream_instance.get_updated_state(stream_state, record)
            res.append(record)
    return res, stream_state
示例#6
0
 def _read_full_refresh(
     self, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, internal_config: InternalConfig
 ) -> Iterator[AirbyteMessage]:
     slices = stream_instance.stream_slices(sync_mode=SyncMode.full_refresh, cursor_field=configured_stream.cursor_field)
     total_records_counter = 0
     for slice in slices:
         records = stream_instance.read_records(
             stream_slice=slice, sync_mode=SyncMode.full_refresh, cursor_field=configured_stream.cursor_field
         )
         for record in records:
             yield self._as_airbyte_record(configured_stream.stream.name, record)
             total_records_counter += 1
             if self._limit_reached(internal_config, total_records_counter):
                 return
示例#7
0
 def _read_full_refresh(
     self, stream_instance: Stream,
     configured_stream: ConfiguredAirbyteStream
 ) -> Iterator[AirbyteMessage]:
     slices = stream_instance.stream_slices(
         sync_mode=SyncMode.full_refresh,
         cursor_field=configured_stream.cursor_field)
     for slice in slices:
         records = stream_instance.read_records(
             stream_slice=slice,
             sync_mode=SyncMode.full_refresh,
             cursor_field=configured_stream.cursor_field)
         for record in records:
             yield self._as_airbyte_record(configured_stream.stream.name,
                                           record)
示例#8
0
    def _read_stream(
        self,
        logger: AirbyteLogger,
        stream_instance: Stream,
        configured_stream: ConfiguredAirbyteStream,
        connector_state: MutableMapping[str, Any],
        internal_config: InternalConfig,
    ) -> Iterator[AirbyteMessage]:

        if internal_config.page_size and isinstance(stream_instance, HttpStream):
            logger.info(f"Setting page size for {stream_instance.name} to {internal_config.page_size}")
            stream_instance.page_size = internal_config.page_size

        use_incremental = configured_stream.sync_mode == SyncMode.incremental and stream_instance.supports_incremental
        if use_incremental:
            record_iterator = self._read_incremental(logger, stream_instance, configured_stream, connector_state, internal_config)
        else:
            record_iterator = self._read_full_refresh(stream_instance, configured_stream, internal_config)

        record_counter = 0
        stream_name = configured_stream.stream.name
        logger.info(f"Syncing stream: {stream_name} ")
        for record in record_iterator:
            if record.type == MessageType.RECORD:
                record_counter += 1
            yield record

        logger.info(f"Read {record_counter} records from {stream_name} stream")
示例#9
0
    def _read_incremental(
        self,
        logger: AirbyteLogger,
        stream_instance: Stream,
        configured_stream: ConfiguredAirbyteStream,
        connector_state: MutableMapping[str, Any],
        internal_config: InternalConfig,
    ) -> Iterator[AirbyteMessage]:
        stream_name = configured_stream.stream.name
        stream_state = connector_state.get(stream_name, {})
        if stream_state:
            logger.info(
                f"Setting state of {stream_name} stream to {stream_state}")

        checkpoint_interval = stream_instance.state_checkpoint_interval
        slices = stream_instance.stream_slices(
            cursor_field=configured_stream.cursor_field,
            sync_mode=SyncMode.incremental,
            stream_state=stream_state)
        total_records_counter = 0
        for slice in slices:
            records = stream_instance.read_records(
                sync_mode=SyncMode.incremental,
                stream_slice=slice,
                stream_state=stream_state,
                cursor_field=configured_stream.cursor_field or None,
            )
            for record_counter, record_data in enumerate(records, start=1):
                yield self._as_airbyte_record(stream_name, record_data)
                stream_state = stream_instance.get_updated_state(
                    stream_state, record_data)
                if checkpoint_interval and record_counter % checkpoint_interval == 0:
                    yield self._checkpoint_state(stream_name, stream_state,
                                                 connector_state, logger)

                total_records_counter += 1
                # This functionality should ideally live outside of this method
                # but since state is managed inside this method, we keep track
                # of it here.
                if self._limit_reached(internal_config, total_records_counter):
                    # Break from slice loop to save state and exit from _read_incremental function.
                    break

            yield self._checkpoint_state(stream_name, stream_state,
                                         connector_state, logger)
            if self._limit_reached(internal_config, total_records_counter):
                return
示例#10
0
def test_wrapped_primary_key_various_argument(test_input, expected):
    """
    Should always wrap primary key into list of lists.
    """

    wrapped = Stream._wrapped_primary_key(test_input)

    assert wrapped == expected
示例#11
0
    def _read_incremental(
        self,
        logger: AirbyteLogger,
        stream_instance: Stream,
        configured_stream: ConfiguredAirbyteStream,
        connector_state: MutableMapping[str, Any],
    ) -> Iterator[AirbyteMessage]:
        stream_name = configured_stream.stream.name
        stream_state = connector_state.get(stream_name, {})
        if stream_state:
            logger.info(
                f"Setting state of {stream_name} stream to {stream_state.get(stream_name)}"
            )

        checkpoint_interval = stream_instance.state_checkpoint_interval
        slices = stream_instance.stream_slices(
            cursor_field=configured_stream.cursor_field,
            sync_mode=SyncMode.incremental,
            stream_state=stream_state)
        for slice in slices:
            record_counter = 0
            records = stream_instance.read_records(
                sync_mode=SyncMode.incremental,
                stream_slice=slice,
                stream_state=stream_state,
                cursor_field=configured_stream.cursor_field or None,
            )
            for record_data in records:
                record_counter += 1
                yield self._as_airbyte_record(stream_name, record_data)
                stream_state = stream_instance.get_updated_state(
                    stream_state, record_data)
                if checkpoint_interval and record_counter % checkpoint_interval == 0:
                    yield self._checkpoint_state(stream_name, stream_state,
                                                 connector_state, logger)

            yield self._checkpoint_state(stream_name, stream_state,
                                         connector_state, logger)
示例#12
0
    def _read_stream(
        self,
        logger: logging.Logger,
        stream_instance: Stream,
        configured_stream: ConfiguredAirbyteStream,
        connector_state: MutableMapping[str, Any],
        internal_config: InternalConfig,
    ) -> Iterator[AirbyteMessage]:
        self._apply_log_level_to_stream_logger(logger, stream_instance)
        if internal_config.page_size and isinstance(stream_instance,
                                                    HttpStream):
            logger.info(
                f"Setting page size for {stream_instance.name} to {internal_config.page_size}"
            )
            stream_instance.page_size = internal_config.page_size

        logger.debug(
            f"Syncing stream: {configured_stream.stream.name}",
            extra={
                "sync_mode": configured_stream.sync_mode,
                "primary_key": configured_stream.primary_key,
                "cursor_field": configured_stream.cursor_field,
            },
        )

        use_incremental = configured_stream.sync_mode == SyncMode.incremental and stream_instance.supports_incremental
        if use_incremental:
            record_iterator = self._read_incremental(
                logger,
                stream_instance,
                configured_stream,
                connector_state,
                internal_config,
            )
        else:
            record_iterator = self._read_full_refresh(logger, stream_instance,
                                                      configured_stream,
                                                      internal_config)

        record_counter = 0
        stream_name = configured_stream.stream.name
        logger.info(f"Syncing stream: {stream_name} ")
        for record in record_iterator:
            if record.type == MessageType.RECORD:
                record_counter += 1
            yield record

        logger.info(f"Read {record_counter} records from {stream_name} stream")
示例#13
0
 def _read_incremental(
     self,
     logger: logging.Logger,
     stream_instance: Stream,
     configured_stream: ConfiguredAirbyteStream,
     connector_state: MutableMapping[str, Any],
     internal_config: InternalConfig,
 ) -> Iterator[AirbyteMessage]:
     """
     This method is overridden to checkpoint the latest actual state,
     because stream state is refreshed after reading each batch of records (if need_chunk is True),
     or reading all records in the stream.
     """
     yield from super()._read_incremental(
         logger=logger,
         stream_instance=stream_instance,
         configured_stream=configured_stream,
         connector_state=connector_state,
         internal_config=internal_config,
     )
     stream_state = stream_instance.get_updated_state(
         current_stream_state={}, latest_record={})
     yield self._checkpoint_state(stream_instance, stream_state,
                                  connector_state)
示例#14
0
def _configured_stream(stream: Stream, sync_mode: SyncMode):
    return ConfiguredAirbyteStream(
        stream=stream.as_airbyte_stream(),
        sync_mode=sync_mode,
        destination_sync_mode=DestinationSyncMode.overwrite)