Exemplo n.º 1
0
    def __call__(self, *args, **kwargs) -> Iterator[ExportContext]:
        import pygsheets
        from google.auth.exceptions import (
            RefreshError,
            OAuthError,
            GoogleAuthError,
            UserAccessTokenError,
        )
        from googleapiclient import errors
        from pygsheets import Worksheet

        self.logger.info("Exportation data")

        self.export: "GoogleSheetsExportPolicy" = self.model_templating(
            *args, model=self.notebook.export)
        api = pygsheets.authorize(**self.export.credentials.dict())
        try:
            sheet = api.open_by_key(self.export.sheet_id)
            page: Worksheet = sheet.worksheet(property="id",
                                              value=self.export.page_id)
            data = page.get_values(
                start=self.export.start,
                end=self.export.end,
                returnas="matrix",
                majdim="COLUMNS",
                include_tailing_empty=False,
                include_tailing_empty_rows=False,
            )
        except (RefreshError, OAuthError, GoogleAuthError,
                UserAccessTokenError) as exc:
            raise AuthError(exc)

        except errors.HttpError as exc:
            if exc.resp.status == 403:
                raise PermissionError(exc)
            raise

        column_names, data = self.drop_extra_columns(data)
        data = self.fill_empty_cells(data)

        yield ExportContext(
            columns=column_names,
            data=data,
            data_orient=DataOrient.columns,
            request_kwargs={
                "sheet_id": self.export.sheet_id,
                "sheet_name": sheet.title,
                "sheet_url": sheet.url,
                "page_id": self.export.page_id,
                "page_name": page.title,
            },
        )
Exemplo n.º 2
0
    def __call__(self, start_period: dt.datetime, end_period: dt.datetime,
                 **kwargs) -> Iterator[ExportContext]:
        try:
            params = self.collect_params(start_period, end_period)

            self.logger.info("Exportation data")
            report = self.client.stats().get(params=params)
            for result in report().pages():
                self.logger.info("Iter export data")

                yield ExportContext(
                    export_kwargs=result().request_kwargs,
                    columns=report.columns,
                    data=result().to_columns(),
                    data_orient=DataOrient.columns,
                )

        except YandexMetrikaTokenError as ex:
            raise AuthError(ex)
Exemplo n.º 3
0
    def __call__(self, start_period: dt.datetime, end_period: dt.datetime,
                 **kwargs) -> Iterator[ExportContext]:
        for url_params, get_params in self.collect_params(
                start_period, end_period):
            method = getattr(self.client, self.resource)
            try:
                self.logger.info("Exportation data")
                result = method(**url_params).get(params=get_params)

                data = self.processing_response_data(self.resource,
                                                     result().data,
                                                     self.columns)
                yield ExportContext(
                    export_kwargs=result().request_kwargs,
                    columns=self.columns,
                    data=data,
                    data_orient=DataOrient.dict,
                )

            except YandexMetrikaTokenError as ex:
                raise AuthError(ex)
Exemplo n.º 4
0
    def __call__(self, start_period: dt.datetime, end_period: dt.datetime,
                 **kwargs) -> Iterator[Union[ExportContext, SleepIteration]]:
        from tapi_yandex_direct import exceptions

        self.logger.info("Exportation data")

        result = None
        page_iterator = None
        api_error_retries = 10
        while True:
            try:
                if result is None:
                    body = self.collect_params(start_period, end_period)
                    method = getattr(self.client, self.export.resource)
                    result = method().post(data=body)

                if self.export.resource != "reports":

                    if page_iterator is None:
                        page_iterator = result().pages(
                            max_pages=kwargs.get("max_pages"))

                    page = next(page_iterator)

            except exceptions.YandexDirectTokenError as exc:
                raise AuthError(exc)

            except exceptions.YandexDirectNotEnoughUnitsError:
                yield SleepIteration(sleep=60 * 5)
                continue

            except exceptions.YandexDirectRequestsLimitError:
                yield SleepIteration(sleep=10)
                continue

            except exceptions.YandexDirectClientError as exc:
                if api_error_retries and exc.error_code in (52, 1000, 1001,
                                                            1002):
                    api_error_retries -= 1
                    yield SleepIteration(sleep=10)
                    continue
                raise

            except ConnectionError:
                if api_error_retries:
                    api_error_retries -= 1
                    yield SleepIteration(sleep=10)
                    continue
                raise

            except StopIteration:
                break

            else:
                if self.export.resource == "reports":
                    if result.status_code in (201, 202):
                        result = None
                        yield SleepIteration(sleep=10)
                        continue

                    data = result().to_values()

                    yield ExportContext(
                        request_kwargs=result.request_kwargs,
                        columns=result.columns,
                        data=data,
                        data_orient=DataOrient.values,
                    )

                    break
                else:
                    self.logger.info("Iter export data")

                    columns = []
                    if page.data:
                        columns = sorted(page.data[0].keys())

                    yield ExportContext(
                        request_kwargs=page.request_kwargs,
                        columns=columns,
                        data=page.data,
                        data_orient=DataOrient.dict,
                    )
Exemplo n.º 5
0
    def __call__(
        self,
        start_period: dt.datetime,
        end_period: dt.datetime,
        dry_run=False,
        **kwargs,
    ) -> Iterator[Union[ExportContext, SleepIteration]]:
        from tapi_yandex_metrika.exceptions import YandexMetrikaTokenError

        try:
            params = self.collect_params(start_period, end_period)

            self.logger.info("Exportation data")

            request_id = self.search_identic_report(start_period, end_period,
                                                    params)
            if request_id is None:
                while True:
                    # Evaluate report.
                    result = self.client.evaluate().get(params=params)

                    if result["log_request_evaluation"]["possible"] is False:
                        sleeptime = 60 * 5
                        self.logger.info(
                            f"The report store is full. "
                            f"Waiting for when to be free {sleeptime} sec.")
                        if dry_run is True:
                            raise Exception("The report store is full")

                        yield SleepIteration(sleep=sleeptime)
                    else:
                        # Create report.
                        result = self.client.create().post(params=params)
                        request_id = result["log_request"]["request_id"]
                        break

            # Wait report.
            repeat_number = 1
            while True:
                # Wait report.
                result = self.client.info(requestId=request_id).get()
                status = result["log_request"]["status"]

                if status == "processed":
                    self.logger.info(f"Download report")
                    report = self.client.download(requestId=request_id).get()
                    break
                elif "cleaned" in status:
                    raise Exception(
                        "The report does not exist, it has been cleared. "
                        "Curent report status is '{}'".format(status))
                else:
                    sleeptime = self.sleeptime(repeat_number)
                    self.logger.info("Wait report {} sec.", sleeptime)
                    if dry_run is True:
                        time.sleep(10)
                    yield SleepIteration(sleep=sleeptime)

                repeat_number += 1

            # Download report.
            for part in report().parts():
                self.logger.info("Iter export data")

                yield ExportContext(
                    request_kwargs=part().request_kwargs,
                    columns=report.columns,
                    data=part().to_columns(),
                    data_orient=DataOrient.columns,
                )

            if dry_run is not True:
                self.client.clean(requestId=request_id).post()

        except YandexMetrikaTokenError as ex:
            raise AuthError(ex)
Exemplo n.º 6
0
    def __call__(self, start_period: dt.datetime, end_period: dt.datetime,
                 **kwargs) -> Iterator[Union[ExportContext, SleepIteration]]:
        import criteo_marketing_transition as cm
        from criteo_marketing_transition.rest import ApiException
        from flowmaster.operators.etl import DataOrient

        self.logger.info("Exportation data")

        columns = self.export.params.dimensions + self.export.params.metrics
        stats_query_message = cm.StatisticsReportQueryMessage(
            dimensions=self.export.params.dimensions,
            metrics=self.export.params.metrics,
            start_date=start_period.date().isoformat(),
            end_date=end_period.date().isoformat(),
            currency=self.export.params.currency,
            format="json",
        )

        iter_num = 0
        api_error_retries = 0
        while True:
            self.logger.info("Iter export data")
            iter_num += 1
            try:
                (
                    response_content,
                    http_code,
                    response_headers,
                ) = self.analytics_api.get_adset_report_with_http_info(
                    statistics_report_query_message=stats_query_message,
                    async_req=True,
                    _preload_content=False,
                ).get()

            except ApiException as exc:
                if (exc.status == 401 or exc.body.get("error")
                        == "credentials_no_longer_supported"):
                    raise AuthError(exc)

                if exc.status == 403:
                    raise ForbiddenError(exc)

                if exc.status == 429:
                    if api_error_retries:
                        api_error_retries -= 1
                        # https://developers.criteo.com/marketing-solutions/docs/requesting-a-report
                        yield SleepIteration(sleep=60)
                        continue

                if exc.status in (500, 503):
                    # https://developers.criteo.com/marketing-solutions/docs/how-to-handle-api-errors
                    if api_error_retries:
                        api_error_retries -= 1
                        yield SleepIteration(sleep=iter_num * 20)
                        continue

                raise
            else:
                if http_code == 200:
                    if response_content:
                        response_content: HTTPResponse
                        content: str = response_content.read().decode(
                            "utf-8-sig")
                        data = orjson.loads(content)

                        if self.export.chunk_size:
                            for chunk in chunker(
                                    data["Rows"],
                                    size=self.export.chunk_size,
                            ):
                                yield ExportContext(
                                    columns=columns,
                                    data=chunk,
                                    data_orient=DataOrient.dict,
                                    response_kwargs={
                                        "headers": response_headers
                                    },
                                )
                        else:
                            yield ExportContext(
                                columns=columns,
                                data=data["Rows"],
                                data_orient=DataOrient.dict,
                                response_kwargs={"headers": response_headers},
                            )
                    else:
                        self.logger.warning("Didn't receive data")

                    break
                else:
                    if api_error_retries:
                        api_error_retries -= 1
                        yield SleepIteration(sleep=10)

                    raise Exception(
                        f"CriteoError: code={http_code}, headers={response_headers}\n"
                        f"response_content={response_content}")