Exemplo n.º 1
0
    def add_period_to_params(self, params):
        """
        Add the time_increment, time_range and/or date_preset keys to parameters.
        - time_increment: available in Ad Insights queries
        - time_range and date_preset: available in Ad Insights queries,
        and in Ad Management queries at the campaign, adset or ad levels only
        """
        if self.ad_insights and self.time_increment:
            params["time_increment"] = self.time_increment

        if self.ad_insights or self.level in ["campaign", "adset", "ad"]:
            if self.start_date and self.end_date:
                logger.info(
                    "Date format used for request: start_date and end_date")
                params["time_range"] = self.create_time_range()
            elif self.date_preset:
                logger.info("Date format used for request: date_preset")
                params["date_preset"] = self.date_preset
            else:

                logging.warning(
                    "No date range provided - Last 30 days by default")
                logging.warning(
                    "https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters"
                )

                logger.warning(
                    "No date range provided - Last 30 days by default")
                logger.warning(
                    "https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters"
                )
Exemplo n.º 2
0
    def read(self):
        if self.report_type == "ANALYTICS":
            entity_ids = self.get_active_entity_ids()

            total_jobs = (len(entity_ids) // MAX_ENTITY_IDS_PER_JOB) + 1
            logger.info(f"Processing a total of {total_jobs} jobs")

            data = []
            for chunk_entity_ids in split_list(
                    entity_ids, MAX_ENTITY_IDS_PER_JOB * MAX_CONCURRENT_JOBS):
                job_ids = self.get_job_ids(chunk_entity_ids)
                data += self.get_analytics_report(job_ids)

        elif self.report_type == "REACH":
            data = self.get_reach_report()

        elif self.report_type == "ENTITY":
            if self.entity == "CARD":
                data = self.get_cards_report()
            else:
                data = self.get_campaign_management_report()

        def result_generator():
            for record in data:
                yield self.add_request_or_period_dates(record)

        yield JSONStream("results_" + self.account.id, result_generator())
Exemplo n.º 3
0
    def read(self):

        for prefix in self._prefix_list:

            objects_sorted_by_time = sorted(
                self.list_objects(bucket=self._bucket, prefix=prefix),
                key=lambda o: self.get_timestamp(o),
            )

            for _object in objects_sorted_by_time:

                _object = self.to_object(_object)

                logger.info(
                    f"Found {self._platform} file {self.get_key(_object)}")

                if not self.is_compatible_object(_object):
                    logger.info(
                        f"Wrong extension: Skipping file {self.get_key(_object)}"
                    )
                    continue

                name = self.get_key(_object).split("/",
                                                   self._dest_key_split)[-1]

                yield JSONStream(name, self._result_generator(_object))
Exemplo n.º 4
0
    def get_analytics_report(self, job_ids):
        """
        Get 'ANALYTICS' report through the 'Asynchronous Analytics' endpoint of Twitter Ads API.
        Documentation: https://developer.twitter.com/en/docs/ads/analytics/api-reference/asynchronous
        """

        all_responses = []

        for job_id in job_ids:

            logger.info(f"Processing job_id: {job_id}")

            # job_result = self.get_job_result(job_id)
            # waiting_sec = 2

            # while job_result.status == "PROCESSING":
            #     logger.info(f"Waiting {waiting_sec} seconds for job to be completed")
            #     sleep(waiting_sec)
            #     if waiting_sec > MAX_WAITING_SEC:
            #         raise JobTimeOutError("Waited too long for job to be completed")
            #     waiting_sec *= 2
            #     job_result = self.get_job_result(job_id)

            job_result = self._waiting_for_job_to_complete(job_id)
            raw_analytics_response = self.get_raw_analytics_response(
                job_result)
            all_responses.append(self.parse(raw_analytics_response))

        return chain(*all_responses)
Exemplo n.º 5
0
    def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}):
        """
        Iterating over report pages, parsing them, and returning a list of iterators,
        containing dictonnary-formatted records: {dimension: value, metric: value}

        The parent_dim_parsed argument (a dictionnary: {dimension: value})
        should be passed if the request includes multiple dimension breakdowns,
        so that we can add their values to output records.
        """

        report_info = {
            "parent_dim": parent_dim_parsed,
            "dim": rep_desc["dimension"].split("variables/")[1],
            "metrics": metrics,
        }
        logger.info(f"Getting report: {report_info}")

        first_response = self.get_report_page(rep_desc)
        all_responses = [
            parse_response(first_response, metrics, parent_dim_parsed)
        ]

        if first_response["totalPages"] > 1:
            for page_nb in range(1, first_response["totalPages"]):
                next_response = self.get_report_page(rep_desc, page_nb)
                all_responses += [
                    parse_response(next_response, metrics, parent_dim_parsed)
                ]

        return chain(*all_responses)
    def test_format_data(self):
        reader = GoogleDCMReader(**self.kwargs)
        input_report = (row for row in [b"x", b"x", b"Report Fields", b"headers", b"1,2,3", b"4,5,6", b"Grand Total"])
        expected = [{"date": "1", "impressions": "2", "clicks": "3"}, {"date": "4", "impressions": "5", "clicks": "6"}]
        input_list = list(reader.format_response(input_report))
        assert len(input_list) == len(expected)

        logger.info(f"{str(input_list)}\n{str(expected)}")
        for input_row, output in zip(input_list, expected):
            assert input_row == output
 def __download_sdf(self, operation):
     request = self._client.media().download(
         resourceName=operation["response"]["resourceName"])
     request.uri = request.uri.replace("?alt=json", "?alt=media")
     sdf = io.FileIO(f"{self.BASE}/{self.ARCHIVE_NAME}.zip", mode="wb")
     downloader = MediaIoBaseDownload(sdf, request)
     done = False
     while done is False:
         status, done = downloader.next_chunk()
         logger.info(f"Download {int(status.progress() * 100)}%.")
Exemplo n.º 8
0
 def _wait_for_100_percent_completion(self, async_job):
     async_job.api_get()
     percent_completion = async_job[
         AdReportRun.Field.async_percent_completion]
     status = async_job[AdReportRun.Field.async_status]
     logger.info(f"{status}: {percent_completion}%")
     if status == "Job Failed":
         logger.info(status)
     elif percent_completion < 100:
         raise Exception(f"{status}: {percent_completion}")
Exemplo n.º 9
0
 def __init__(self, name, host, port=6379):
     if host:
         logger.info(f"Using checkpointing service: {host}:{port} ({name})")
         self._enabled = True
         self._name = name
         self._host = host
         self._port = port
         self._client = redis.Redis(host=host, port=port)
     else:
         self._enabled = False
         logger.info("No checkpointing")
Exemplo n.º 10
0
 def _wait_for_query(self, query_id):
     logger.info(
         f"waiting for query of id : {query_id} to complete running")
     query_infos = self.get_query(query_id)
     if query_infos["metadata"]["running"] or (
             "googleCloudStoragePathForLatestReport"
             not in query_infos["metadata"]
             and "googleDrivePathForLatestReport"
             not in query_infos["metadata"]):
         raise Exception("Query still running.")
     else:
         return query_infos
Exemplo n.º 11
0
    def _load_access_info(self):
        logger.info("Retrieving Salesforce access token")

        res = requests.post(SALESFORCE_LOGIN_ENDPOINT,
                            params=self._get_login_params())

        res.raise_for_status()

        self._access_token = res.json().get("access_token")
        self._instance_url = res.json().get("instance_url")

        return self._access_token, self._instance_url
    def __create_sdf_task(self, body):
        """
        Create a sdf asynchronous task of type googleapiclient.discovery.Resource
            Args:
                body (dict) : request body to describe the data within the generated sdf file.
            Return:
                operation (dict) : contains the task metadata.
        """

        operation = self._client.sdfdownloadtasks().create(body=body).execute()
        logger.info(f"Operation {operation['name']} was created.")
        return operation
 def _create_report_schedule(self):
     method, endpoint = API_ENDPOINTS["create_report_schedule"]
     payload = {
         "ReportScheduleName": self.report_schedule_name,
         "ReportTemplateId": self.report_template_id,
         "AdvertiserFilters": self.advertiser_ids,
         "ReportStartDateInclusive": self.start_date.isoformat(),
         "ReportEndDateExclusive": self.end_date.isoformat(),
         **DEFAULT_REPORT_SCHEDULE_ARGS,
     }
     logger.info(f"Creating ReportSchedule: {payload}")
     json_response = self._make_api_call(method, endpoint, payload)
     self.report_schedule_id = json_response["ReportScheduleId"]
Exemplo n.º 14
0
    def assert_report_file_ready(self, report_id, file_id):
        """Poke the report file status"""
        report_file = self._service.files().get(reportId=report_id,
                                                fileId=file_id).execute()

        status = report_file["status"]
        if status == "REPORT_AVAILABLE":
            logger.info(f"File status is {status}, ready to download.")
            pass
        elif status != "PROCESSING":
            raise FileNotFoundError(
                f"File status is {status}, processing failed.")
        else:
            raise FileNotFoundError("File status is PROCESSING")
 def _wait_for_download_url(self):
     report_execution_details = self._get_report_execution_details()
     if report_execution_details["ReportExecutionState"] == "Pending":
         raise ReportScheduleNotReadyError(
             f"ReportSchedule '{self.report_schedule_id}' is still running."
         )
     else:
         # As the ReportSchedule that we just created runs only once,
         # the API response will include only one ReportDelivery (so we can get index "[0]")
         self.download_url = report_execution_details["ReportDeliveries"][
             0]["DownloadURL"]
         logger.info(
             f"ReportScheduleId '{self.report_schedule_id}' is ready. DownloadURL: {self.download_url}"
         )
Exemplo n.º 16
0
    def _create_engine(cls, host, port, user, password, database):
        logger.info(
            f"Connecting to MySQL Database {database} on {host}:{port}")

        url = sqlalchemy.engine.url.URL(
            **{
                "drivername": "mysql+pymysql",
                "username": user,
                "password": password,
                "database": database,
                "port": port,
                "host": host,
            })

        return sqlalchemy.create_engine(url)
 def __get_creatives(self):
     response = self._client.advertisers().creatives().list(
         advertiserId=self.kwargs.get("advertiser_id")).execute()
     if len(response.keys()) == 0:  # no data returned
         return {}
     else:
         all_creatives = response["creatives"]
         while "nextPageToken" in response:
             token = response["nextPageToken"]
             logger.info(f"Query a new page of creatives. Page id: {token}")
             response = (self._client.advertisers().creatives().list(
                 advertiserId=self.kwargs.get("advertiser_id"),
                 pageToken=token).execute())
             all_creatives.extend(response["creatives"])
     yield from all_creatives
Exemplo n.º 18
0
    def read(self):
        """
        :return: stream that returns Radarly posts one by one
        """
        date_ranges_and_posts_volumes: Dict = self.split_date_range()
        logger.info(
            f"API Compliant Date Ranges and Posts Volumes: {date_ranges_and_posts_volumes}"
        )
        api_compliant_date_ranges = list(date_ranges_and_posts_volumes.keys())

        t0 = time.time()
        ingestion_tracker = []

        for i, date_range in enumerate(api_compliant_date_ranges):

            if self.throttle:
                current_time = time.time() - t0
                ingestion_tracker.append(current_time)
                posts_ingested_over_window = (sum(
                    np.array(ingestion_tracker) > current_time -
                    self.api_window) * self.api_date_period_limit)
                if posts_ingested_over_window > self.throttling_threshold_coefficient * self.api_quarterly_posts_limit:
                    sleep_duration = self.api_window * (
                        self.api_date_period_limit /
                        self.api_quarterly_posts_limit)
                    logger.info(
                        f"Throttling activated: waiting for {sleep_duration} seconds..."
                    )
                    time.sleep(sleep_duration)

            all_publications = self.get_publications_iterator(date_range)
            name = f"""radarly_{date_range[0].strftime("%Y-%m-%d-%H-%M-%S")}_{date_range[1].strftime(
                "%Y-%m-%d-%H-%M-%S")}"""

            def result_generator():
                while True:
                    try:
                        pub = next(all_publications)
                        yield dict(pub)
                    except StopIteration:
                        break
                    except Exception:
                        ex_type, ex, tb = sys.exc_info()
                        logger.warning(
                            f"Failed to ingest post with error: {ex}. Traceback: {traceback.print_tb(tb)}"
                        )

            yield JSONStream(name, result_generator())
Exemplo n.º 19
0
    def query_ad_insights(self, fields, params, object_id):
        """
        Ad Insights documentation:
        https://developers.facebook.com/docs/marketing-api/insights
        """

        logger.info(
            f"Running Facebook Ad Insights query on {self.object_type}_id: {object_id}"
        )

        # Step 1 - Create Facebook object
        obj = self.create_object(object_id)
        # Step 2 - Run Ad Insights query on Facebook object
        report_job = self._get_report(obj, fields, params)

        yield from report_job.get_result()
Exemplo n.º 20
0
        def new_func(*args, **kwargs):

            _kwargs = {}

            for key, value in kwargs.items():
                if key in sensitive_fields:
                    _kwargs[key] = "*****"
                else:
                    _kwargs[key] = value

            logger.info(f"Calling {f.__name__} with ({_kwargs})")

            def processor():
                return f(*args, **kwargs)

            return update_wrapper(processor, f)
Exemplo n.º 21
0
    def add_report_filter(self, report_definition):
        """Check if a filter was provided and contains the necessary information"""
        if not self.report_filter:
            logger.info("No filter provided by user")

        elif all(required_param in self.report_filter.keys()
                 for required_param in ("field", "operator", "values")):
            report_definition["selector"]["predicates"] = {
                "field": self.report_filter["field"],
                "operator": self.report_filter["operator"],
                "values": self.report_filter["values"],
            }
        else:
            raise ClickException(
                "Wrong format for Report filter : should be a dictionary as string, with the following fields:\n"
                "Dictionary {'field','operator','values'}")
    def write(self, stream):
        """
        Write file to disk at location given as parameter.
        """
        file_name = self._file_name or stream.name
        path = os.path.join(self._directory, file_name)

        logger.info(f"Writing stream {file_name} to {path}")
        file = stream.as_file()
        with open(path, "wb") as h:
            while True:
                buffer = file.read(1024)
                if len(buffer) > 0:
                    h.write(buffer)
                else:
                    break
Exemplo n.º 23
0
    def log_sampling(report):
        """ Log sampling data if a report has been sampled."""
        data = report.get("data", {})

        if data.get("samplesReadCounts") is not None:
            logger.warning("☝️Report has been sampled.")
            sample_reads = data["samplesReadCounts"][0]
            sample_space = data["samplingSpaceSizes"][0]
            logger.warning(f"sample reads : {sample_reads}")
            logger.warning(f"sample space :{sample_space}")

            logger.warning(
                f"sample percent :{100 * int(sample_reads) / int(sample_space)}%"
            )
        else:
            logger.info("Report is not sampled.")
    def test_read(self, mock_get_payload, mock_Project, mock_RadarlyApi):
        mock_RadarlyApi.init.side_effect = lambda client_id, client_secret: logger.info(
            "Mock RadarlyApi successfully initiated")
        mock_get_payload.side_effect = create_mock_payload
        mock_project_object = MagicMock()
        mock_project_object.get_all_publications = create_mock_publications_iterator
        mock_Project.find.return_value = mock_project_object

        reader = RadarlyReader(
            pid=1,
            client_id="xxx",
            client_secret="xxx",
            focus_id=(1, 2, 3),
            start_date=datetime(2020, 1, 1),
            end_date=datetime(2020, 1, 1, 3),
            api_request_limit=250,
            api_date_period_limit=10000,
            api_quarterly_posts_limit=45000,
            api_window=300,
            throttle=True,
            throttling_threshold_coefficient=0.95,
        )

        for stream in reader.read():
            line = stream.as_file().readline()
            line = json.loads(line)
            assert "date" in line.keys()
            assert "text" in line.keys()
    def _run_query(self):
        self.initialize_analyticsreporting()

        response = self._service.searchanalytics().query(
            siteUrl=self.site_url, body=self.build_query()).execute()
        yield response

        # Pagination
        while len(response.get("rows", [])) != 0:
            logger.info(
                f"{len(response.get('rows')) + self.start_row} lines successfully processed..."
            )
            self.start_row += self.row_limit
            response = self._service.searchanalytics().query(
                siteUrl=self.site_url, body=self.build_query()).execute()
            yield response
def monitor_usage(response):
    """
    Extracts "X-Business-Use-Case-Usage" header from a FacebookResponse object.
    If one of the 3 API usage rates (call_count, total_cputime, total_time)
    is above 75%, puts the program to sleep for 5 minutes.
    Documentation: https://developers.facebook.com/docs/graph-api/overview/rate-limiting/
    """

    for header in response._headers:
        if header["name"] == "X-Business-Use-Case-Usage":
            usage_header = json.loads(header["value"])
            usage_header_values = list(usage_header.values())[0][0]
            usage_rates = [v for k, v in usage_header_values.items() if k in ["call_count", "total_cputime", "total_time"]]

    if max(usage_rates) > 75:
        logger.info("75% rate limit reached. Sleeping for 5 minutes...")
        sleep(300)
Exemplo n.º 27
0
 def add_period_to_report_definition(self, report_definition):
     """Add Date period from provided start date and end date, when CUSTOM DATE range is called"""
     if (self.date_range_type == "CUSTOM_DATE") & (not self.start_date
                                                   or not self.end_date):
         raise NoDateDefinitionException("""You must define a couple
         start-date/end-date when using a custom_date""")
     elif self.date_range_type == "CUSTOM_DATE":
         logger.info(
             "Date format used for request : Custom Date Range with start_date and end_date provided"
         )
         report_definition["selector"][
             "dateRange"] = self.create_date_range(self.start_date,
                                                   self.end_date)
     elif self.start_date is not None and self.end_date is not None and self.date_range_type != "CUSTOM_DATE":
         raise InconsistentDateDefinitionException(
             "You must define either the couple start_date and end_date or a date_range, \
                 different from CUSTOM_DATE, but not both")
 def __wait_sdf_download_request(self, operation):
     """
     Wait for a sdf task to be completed. ie. (file ready for download)
         Args:
             operation (dict): task metadata
         Returns:
             operation (dict): task metadata updated with resource location.
     """
     logger.info(
         f"waiting for SDF operation: {operation['name']} to complete running."
     )
     get_request = self._client.sdfdownloadtasks().operations().get(
         name=operation["name"])
     operation = get_request.execute()
     if "done" not in operation:
         raise RetryTimeoutError(
             "The operation has taken more than 10 hours to complete.\n")
     return operation
Exemplo n.º 29
0
    def query(self, query):

        logger.info(f"Running Salesforce query: {query}")

        response = self._request_data(SALESFORCE_QUERY_ENDPOINT, {"q": query})

        generating = True

        while generating:

            for rec in response["records"]:
                yield rec

            if "nextRecordsUrl" in response:
                logger.info("Fetching next page of Salesforce results")
                response = self._request_data(response["nextRecordsUrl"])
            else:
                generating = False
Exemplo n.º 30
0
    def assert_report_file_ready(self, report_id):
        """Poll the API with the reportId until the report is ready, up to 100 times.

        Args:
          report_id: The ID SA360 has assigned to a report.
        """
        request = self._service.reports().get(reportId=report_id)
        report_data = request.execute()
        if report_data["isReportReady"]:
            logger.info("The report is ready.")

            # For large reports, SA360 automatically fragments the report into multiple
            # files. The 'files' property in the JSON object that SA360 returns contains
            # the list of URLs for file fragment. To download a report, SA360 needs to
            # know the report ID and the index of a file fragment.
            return report_data
        else:
            logger.info("Report is not ready. Retrying...")
            raise FileNotFoundError