Exemplo n.º 1
0
    def run(self, parsed_args: argparse.Namespace) -> Iterable[str]:
        cmd = parsed_args.command
        if not cmd:
            raise Exception("No command passed")

        if hasattr(parsed_args, "debug") and parsed_args.debug:
            self.logger.setLevel(logging.DEBUG)
            self.logger.debug("Debug logs enabled")
        else:
            self.logger.setLevel(logging.INFO)

        # todo: add try catch for exceptions with different exit codes
        source_spec: ConnectorSpecification = self.source.spec(self.logger)
        with tempfile.TemporaryDirectory() as temp_dir:
            if cmd == "spec":
                message = AirbyteMessage(type=Type.SPEC, spec=source_spec)
                yield message.json(exclude_unset=True)
            else:
                raw_config = self.source.read_config(parsed_args.config)
                config = self.source.configure(raw_config, temp_dir)

                # Now that we have the config, we can use it to get a list of ai airbyte_secrets
                # that we should filter in logging to avoid leaking secrets
                config_secrets = get_secrets(
                    source_spec.connectionSpecification, config)
                update_secrets(config_secrets)

                # Remove internal flags from config before validating so
                # jsonschema's additionalProperties flag wont fail the validation
                connector_config, _ = split_config(config)
                if self.source.check_config_against_spec or cmd == "check":
                    check_config_against_spec_or_exit(connector_config,
                                                      source_spec)

                if cmd == "check":
                    check_result = self.source.check(self.logger, config)
                    if check_result.status == Status.SUCCEEDED:
                        self.logger.info("Check succeeded")
                    else:
                        self.logger.error("Check failed")

                    output_message = AirbyteMessage(
                        type=Type.CONNECTION_STATUS,
                        connectionStatus=check_result).json(exclude_unset=True)
                    yield output_message
                elif cmd == "discover":
                    catalog = self.source.discover(self.logger, config)
                    yield AirbyteMessage(
                        type=Type.CATALOG,
                        catalog=catalog).json(exclude_unset=True)
                elif cmd == "read":
                    config_catalog = self.source.read_catalog(
                        parsed_args.catalog)
                    state = self.source.read_state(parsed_args.state)
                    generator = self.source.read(self.logger, config,
                                                 config_catalog, state)
                    for message in generator:
                        yield message.json(exclude_unset=True)
                else:
                    raise Exception("Unexpected command " + cmd)
Exemplo n.º 2
0
    def read(
        self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None
    ) -> Iterator[AirbyteMessage]:
        """Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.io/architecture/airbyte-specification."""
        connector_state = copy.deepcopy(state or {})
        logger.info(f"Starting syncing {self.name}")
        config, internal_config = split_config(config)
        # TODO assert all streams exist in the connector
        # get the streams once in case the connector needs to make any queries to generate them
        stream_instances = {s.name: s for s in self.streams(config)}
        self._stream_to_instance_map = stream_instances
        with create_timer(self.name) as timer:
            for configured_stream in catalog.streams:
                stream_instance = stream_instances.get(configured_stream.stream.name)
                if not stream_instance:
                    raise KeyError(
                        f"The requested stream {configured_stream.stream.name} was not found in the source. Available streams: {stream_instances.keys()}"
                    )

                try:
                    yield from self._read_stream(
                        logger=logger,
                        stream_instance=stream_instance,
                        configured_stream=configured_stream,
                        connector_state=connector_state,
                        internal_config=internal_config,
                    )
                except Exception as e:
                    logger.exception(f"Encountered an exception while reading stream {self.name}")
                    raise e
                finally:
                    logger.info(f"Finished syncing {self.name}")
                    logger.info(timer.report())

        logger.info(f"Finished syncing {self.name}")
Exemplo n.º 3
0
    def read(
        self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None
    ) -> Iterator[AirbyteMessage]:
        """
        Overwritten to dynamically receive only those streams that are necessary for reading for significant speed gains
        (Salesforce has a strict API limit on requests).
        """
        connector_state = copy.deepcopy(state or {})
        config, internal_config = split_config(config)
        # get the streams once in case the connector needs to make any queries to generate them
        logger.info("Starting generating streams")
        stream_instances = {s.name: s for s in self.streams(config, catalog=catalog)}
        logger.info(f"Starting syncing {self.name}")
        self._stream_to_instance_map = stream_instances
        for configured_stream in catalog.streams:
            stream_instance = stream_instances.get(configured_stream.stream.name)
            if not stream_instance:
                raise KeyError(
                    f"The requested stream {configured_stream.stream.name} was not found in the source. Available streams: {stream_instances.keys()}"
                )

            try:
                yield from self._read_stream(
                    logger=logger,
                    stream_instance=stream_instance,
                    configured_stream=configured_stream,
                    connector_state=connector_state,
                    internal_config=internal_config,
                )
            except Exception as e:
                logger.exception(f"Encountered an exception while reading stream {self.name}")
                raise e

        logger.info(f"Finished syncing {self.name}")
Exemplo n.º 4
0
    def read(
            self,
            logger: logging.Logger,
            config: Mapping[str, Any],
            catalog: ConfiguredAirbyteCatalog,
            state: MutableMapping[str,
                                  Any] = None) -> Iterator[AirbyteMessage]:
        """
        This method is overridden to check whether the stream `quotes` exists in the source, if not skip reading that stream.
        """
        connector_state = copy.deepcopy(state or {})
        logger.info(f"Starting syncing {self.name}")
        config, internal_config = split_config(config)
        # TODO assert all streams exist in the connector
        # get the streams once in case the connector needs to make any queries to generate them
        stream_instances = {s.name: s for s in self.streams(config)}
        self._stream_to_instance_map = stream_instances
        with create_timer(self.name) as timer:
            for configured_stream in catalog.streams:
                stream_instance = stream_instances.get(
                    configured_stream.stream.name)
                if not stream_instance and configured_stream.stream.name == "quotes":
                    logger.warning(
                        "Stream `quotes` does not exist in the source. Skip reading `quotes` stream."
                    )
                    continue
                if not stream_instance:
                    raise KeyError(
                        f"The requested stream {configured_stream.stream.name} was not found in the source. Available streams: {stream_instances.keys()}"
                    )

                try:
                    yield from self._read_stream(
                        logger=logger,
                        stream_instance=stream_instance,
                        configured_stream=configured_stream,
                        connector_state=connector_state,
                        internal_config=internal_config,
                    )
                except Exception as e:
                    logger.exception(
                        f"Encountered an exception while reading stream {configured_stream.stream.name}"
                    )
                    display_message = stream_instance.get_error_display_message(
                        e)
                    if display_message:
                        raise AirbyteTracedException.from_exception(
                            e, message=display_message) from e
                    raise e
                finally:
                    logger.info(f"Finished syncing {self.name}")
                    logger.info(timer.report())

        logger.info(f"Finished syncing {self.name}")
Exemplo n.º 5
0
    def read(
        self,
        logger: AirbyteLogger,
        config: Mapping[str, Any],
        catalog: ConfiguredAirbyteCatalog,
        state: Optional[MutableMapping[str, Any]] = None,
    ) -> Iterator[AirbyteMessage]:
        """
        Overwritten to dynamically receive only those streams that are necessary for reading for significant speed gains
        (Salesforce has a strict API limit on requests).
        """
        connector_state = copy.deepcopy(state or {})
        config, internal_config = split_config(config)
        # get the streams once in case the connector needs to make any queries to generate them
        logger.info("Starting generating streams")
        stream_instances = {
            s.name: s
            for s in self.streams(config, catalog=catalog, state=state)
        }
        logger.info(f"Starting syncing {self.name}")
        self._stream_to_instance_map = stream_instances
        for configured_stream in catalog.streams:
            stream_instance = stream_instances.get(
                configured_stream.stream.name)
            if not stream_instance:
                raise KeyError(
                    f"The requested stream {configured_stream.stream.name} was not found in the source. Available streams: {stream_instances.keys()}"
                )

            try:
                yield from self._read_stream(
                    logger=logger,
                    stream_instance=stream_instance,
                    configured_stream=configured_stream,
                    connector_state=connector_state,
                    internal_config=internal_config,
                )
            except exceptions.HTTPError as error:
                error_data = error.response.json()[0]
                error_code = error_data.get("errorCode")
                if error.response.status_code == codes.FORBIDDEN and error_code == "REQUEST_LIMIT_EXCEEDED":
                    logger.warn(
                        f"API Call limit is exceeded. Error message: '{error_data.get('message')}'"
                    )
                    break  # if got 403 rate limit response, finish the sync with success.
                raise error

            except Exception as e:
                logger.exception(
                    f"Encountered an exception while reading stream {self.name}"
                )
                raise e

        logger.info(f"Finished syncing {self.name}")
Exemplo n.º 6
0
    def run(self, parsed_args: argparse.Namespace) -> Iterable[str]:
        cmd = parsed_args.command
        if not cmd:
            raise Exception("No command passed")

        # todo: add try catch for exceptions with different exit codes
        source_spec = self.source.spec(self.logger)

        with tempfile.TemporaryDirectory() as temp_dir:
            if cmd == "spec":
                message = AirbyteMessage(type=Type.SPEC, spec=source_spec)
                yield message.json(exclude_unset=True)
            else:
                raw_config = self.source.read_config(parsed_args.config)
                config = self.source.configure(raw_config, temp_dir)
                # Remove internal flags from config before validating so
                # jsonschema's additionalProperties flag wont fail the validation
                config, internal_config = split_config(config)
                if self.source.check_config_against_spec or cmd == "check":
                    check_config_against_spec_or_exit(config, source_spec,
                                                      self.logger)
                # Put internal flags back to config dict
                config.update(internal_config.dict())

                if cmd == "check":
                    check_result = self.source.check(self.logger, config)
                    if check_result.status == Status.SUCCEEDED:
                        self.logger.info("Check succeeded")
                    else:
                        self.logger.error("Check failed")

                    output_message = AirbyteMessage(
                        type=Type.CONNECTION_STATUS,
                        connectionStatus=check_result).json(exclude_unset=True)
                    yield output_message
                elif cmd == "discover":
                    catalog = self.source.discover(self.logger, config)
                    yield AirbyteMessage(
                        type=Type.CATALOG,
                        catalog=catalog).json(exclude_unset=True)
                elif cmd == "read":
                    config_catalog = self.source.read_catalog(
                        parsed_args.catalog)
                    state = self.source.read_state(parsed_args.state)
                    generator = self.source.read(self.logger, config,
                                                 config_catalog, state)
                    for message in generator:
                        yield message.json(exclude_unset=True)
                else:
                    raise Exception("Unexpected command " + cmd)