Exemplo n.º 1
0
    def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
        client = GoogleSheetsClient(self.get_credentials(config))
        spreadsheet_id = config["spreadsheet_id"]
        try:
            logger.info(f"Running discovery on sheet {spreadsheet_id}")
            spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
            grid_sheets = Helpers.get_grid_sheets(spreadsheet_metadata)
            streams = []
            for sheet_name in grid_sheets:
                try:
                    header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name)
                    stream = Helpers.headers_to_airbyte_stream(logger, sheet_name, header_row_data)
                    streams.append(stream)
                except Exception as err:
                    if str(err).startswith("Expected data for exactly one row for sheet"):
                        logger.warn(f"Skip empty sheet: {sheet_name}")
                    else:
                        logger.error(str(err))
            return AirbyteCatalog(streams=streams)

        except errors.HttpError as err:
            reason = str(err)
            if err.resp.status == status_codes.NOT_FOUND:
                reason = "Requested spreadsheet was not found."
            raise Exception(f"Could not run discovery: {reason}")
Exemplo n.º 2
0
    def headers_to_airbyte_stream(
            logger: AirbyteLogger, sheet_name: str,
            header_row_values: List[str]) -> AirbyteStream:
        """
        Parses sheet headers from the provided row. This method assumes that data is contiguous
        i.e: every cell contains a value and the first cell which does not contain a value denotes the end
        of the headers. For example, if the first row contains "One | Two | | Three" then this method
        will parse the headers as ["One", "Two"]. This assumption is made for simplicity and can be modified later.
        """
        fields, duplicate_fields = Helpers.get_valid_headers_and_duplicates(
            header_row_values)
        if duplicate_fields:
            logger.warn(
                f"Duplicate headers found in {sheet_name}. Ignoring them :{duplicate_fields}"
            )

        sheet_json_schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            # For simplicity, the type of every cell is a string
            "properties": {field: {
                "type": "string"
            }
                           for field in fields},
        }

        return AirbyteStream(name=sheet_name, json_schema=sheet_json_schema)
Exemplo n.º 3
0
    def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
        # Check involves verifying that the specified spreadsheet is reachable with our credentials.
        try:
            client = GoogleSheetsClient(self.get_credentials(config))
        except Exception as e:
            return AirbyteConnectionStatus(status=Status.FAILED, message=f"Please use valid credentials json file. Error: {e}")

        spreadsheet_id = config["spreadsheet_id"]

        try:
            # Attempt to get first row of sheet
            client.get(spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1")
        except errors.HttpError as err:
            reason = str(err)
            # Give a clearer message if it's a common error like 404.
            if err.resp.status == status_codes.NOT_FOUND:
                reason = "Requested spreadsheet was not found."
            logger.error(f"Formatted error: {reason}")
            return AirbyteConnectionStatus(
                status=Status.FAILED, message=f"Unable to connect with the provided credentials to spreadsheet. Error: {reason}"
            )

        # Check for duplicate headers
        spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False))

        grid_sheets = Helpers.get_grid_sheets(spreadsheet_metadata)

        duplicate_headers_in_sheet = {}
        for sheet_name in grid_sheets:
            try:
                header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name)
                _, duplicate_headers = Helpers.get_valid_headers_and_duplicates(header_row_data)
                if duplicate_headers:
                    duplicate_headers_in_sheet[sheet_name] = duplicate_headers
            except Exception as err:
                if str(err).startswith("Expected data for exactly one row for sheet"):
                    logger.warn(f"Skip empty sheet: {sheet_name}")
                else:
                    logger.error(str(err))
                    return AirbyteConnectionStatus(
                        status=Status.FAILED, message=f"Unable to read the schema of sheet {sheet_name}. Error: {str(err)}"
                    )
        if duplicate_headers_in_sheet:
            duplicate_headers_error_message = ", ".join(
                [
                    f"[sheet:{sheet_name}, headers:{duplicate_sheet_headers}]"
                    for sheet_name, duplicate_sheet_headers in duplicate_headers_in_sheet.items()
                ]
            )
            return AirbyteConnectionStatus(
                status=Status.FAILED,
                message="The following duplicate headers were found in the following sheets. Please fix them to continue: "
                + duplicate_headers_error_message,
            )

        return AirbyteConnectionStatus(status=Status.SUCCEEDED)
Exemplo n.º 4
0
    def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
        """
        Tests if the input configuration can be used to successfully connect to the integration
            e.g: if a provided Stripe API token can be used to connect to the Stripe API.

        :param logger: Logging object to display debug/info/error to the logs
            (logs will not be accessible via airbyte UI if they are not passed to this logger)
        :param config_path: Path to the file containing the configuration json config
        :param config: Json object containing the configuration of this source, content of this json is as specified in
        the properties of the spec.json file

        :return: AirbyteConnectionStatus indicating a Success or Failure
        """
        try:
            # If an app on the appstore does not support subscriptions or sales, it cannot pull the relevant reports.
            # However, the way the Appstore API expresses this is not via clear error messages. Instead it expresses it by throwing an unrelated
            # error, in this case "invalid vendor ID". There is no way to distinguish if this error is due to invalid credentials or due to
            # the account not supporting this kind of report. So to "check connection" we see if any of the reports can be pulled and if so
            # return success. If no reports can be pulled we display the exception messages generated for all reports and return failure.
            api_fields_to_test = {
                "subscription_event_report": {
                    "reportType": "SUBSCRIPTION_EVENT",
                    "frequency": "DAILY",
                    "reportSubType": "SUMMARY",
                    "version": "1_2",
                },
                "subscriber_report": {"reportType": "SUBSCRIBER", "frequency": "DAILY", "reportSubType": "DETAILED", "version": "1_2"},
                "subscription_report": {"reportType": "SUBSCRIPTION", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_2"},
                "sales_report": {"reportType": "SALES", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_0"},
            }

            api = Api(config["key_id"], config["key_file"], config["issuer_id"])
            stream_to_error = {}
            for stream, params in api_fields_to_test.items():
                test_date = date.today() - timedelta(days=2)
                report_filters = {"reportDate": test_date.strftime("%Y-%m-%d"), "vendorNumber": f"{config['vendor']}"}
                report_filters.update(api_fields_to_test[stream])
                try:
                    rep_tsv = api.download_sales_and_trends_reports(filters=report_filters)
                    if isinstance(rep_tsv, dict):
                        raise Exception(f"An exception occurred: Received a JSON response instead of" f" the report: {str(rep_tsv)}")
                except Exception as e:
                    logger.warn(f"Unable to download {stream}: {e}")
                    stream_to_error[stream] = e

            # All streams have failed
            if len(stream_to_error.keys()) == api_fields_to_test.keys():
                message = "\n".join([f"Unable to access {stream} due to error: {e}" for stream, e in stream_to_error])
                return AirbyteConnectionStatus(status=Status.FAILED, message=message)

            return AirbyteConnectionStatus(status=Status.SUCCEEDED)
        except Exception as e:
            logger.warn(e)
            return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {str(e)}")
Exemplo n.º 5
0
    def check_config(self, logger: AirbyteLogger, config_path: str,
                     config: json) -> AirbyteConnectionStatus:
        """
        Tests if the input configuration can be used to successfully connect to the integration
            e.g: if a provided Stripe API token can be used to connect to the Stripe API.

        :param logger: Logging object to display debug/info/error to the logs
            (logs will not be accessible via airbyte UI if they are not passed to this logger)
        :param config_path: Path to the file containing the configuration json config
        :param config: Json object containing the configuration of this source, content of this json is as specified in
        the properties of the spec.json file

        :return: AirbyteConnectionStatus indicating a Success or Failure
        """
        try:
            # create request fields for testing
            api_fields_to_test = {
                "subscription_report": {
                    "reportType": "SUBSCRIPTION",
                    "frequency": "DAILY",
                    "reportSubType": "SUMMARY",
                    "version": "1_2"
                }
            }
            test_date = date.today() - timedelta(days=2)
            report_filters = {
                "reportDate": test_date.strftime("%Y-%m-%d"),
                "vendorNumber": "{}".format(config["vendor"])
            }

            report_filters.update(api_fields_to_test["subscription_report"])

            # fetch data from appstore api
            api = Api(config["key_id"], config["key_file"],
                      config["issuer_id"])

            rep_tsv = api.download_sales_and_trends_reports(
                filters=report_filters)

            if isinstance(rep_tsv, dict):
                return AirbyteConnectionStatus(
                    status=Status.FAILED,
                    message=
                    f"An exception occurred: Received a JSON response instead of"
                    f" the report: {str(rep_tsv)}",
                )

            return AirbyteConnectionStatus(status=Status.SUCCEEDED)
        except Exception as e:
            logger.warn(e)
            return AirbyteConnectionStatus(
                status=Status.FAILED,
                message=f"An exception occurred: {str(e)}")
Exemplo n.º 6
0
    def read(
        self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
    ) -> Generator[AirbyteMessage, None, None]:
        client = self._client(config)

        logger.info("Starting syncing sendgrid")
        for configured_stream in catalog.streams:
            # TODO handle incremental syncs
            stream = configured_stream.stream
            if stream.name not in client.ENTITY_MAP.keys():
                logger.warn(f"Stream '{stream}' not found in the recognized entities")
                continue
            for record in self._read_record(client=client, stream=stream.name):
                yield AirbyteMessage(type=Type.RECORD, record=record)

        logger.info("Finished syncing sendgrid")
Exemplo n.º 7
0
    def read(self,
             logger: AirbyteLogger,
             config_container: ConfigContainer,
             catalog_path,
             state=None) -> Generator[AirbyteMessage, None, None]:
        client = self._client(config_container)

        config = self.read_config(catalog_path)
        catalog = ConfiguredAirbyteCatalog.parse_obj(config)

        logger.info("Starting syncing recurly")
        for configured_stream in catalog.streams:
            # TODO handle incremental syncs
            stream = configured_stream.stream
            if stream.name not in client.ENTITIES:
                logger.warn(
                    f"Stream '{stream}' not found in the recognized entities")
                continue
            for record in self._read_record(client=client, stream=stream.name):
                yield AirbyteMessage(type=Type.RECORD, record=record)

        logger.info("Finished syncing recurly")