def collect_page(job_scope: JobScope, _job_context: JobContext): """ Collect a single facebook page """ if job_scope.report_variant != Entity.Page: raise ValueError( f"Report level {job_scope.report_variant} specified is not: {Entity.Page}" ) token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) # We don't use it for getting a token. Something else that calls us does. # However, we use it to report usages of the token we got. token_manager = PlatformTokenManager.from_job_scope(job_scope) with PlatformApiContext(token) as fb_ctx: page_inst = page.Page(fbid=job_scope.entity_id, api=fb_ctx.api) page_fetched = page_inst.api_get(fields=get_default_fields(Page)) report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched, job_scope) token_manager.report_usage(token, 2) record_id_data = job_scope.to_dict() record_id_data.update(entity_type=Entity.Page, entity_id=job_scope.entity_id, report_variant=None) entity_data = page_fetched.export_all_data() entity_data = add_vendor_data( entity_data, id=generate_universal_id(**record_id_data)) store = NormalStore(job_scope) store.store(entity_data)
def iter_collect_entities_per_page_post( job_scope: JobScope) -> Generator[Dict[str, Any], None, None]: """ Collects an arbitrary entity for a page post """ entity_type = job_scope.report_variant page_token_manager = PageTokenManager.from_job_scope(job_scope) with PlatformApiContext( page_token_manager.get_best_token( job_scope.ad_account_id)) as fb_ctx: root_fb_entity = fb_ctx.to_fb_model(job_scope.entity_id, Entity.PagePost) entities = iter_native_entities_per_page_post(root_fb_entity, entity_type) record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) del record_id_base_data['entity_id'] with ChunkDumpStore(job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store: for entity in entities: entity_data = entity.export_all_data() entity_data = add_vendor_data(entity_data, id=generate_universal_id( entity_id=entity_data.get('id'), **record_id_base_data)) entity_data['page_id'] = job_scope.ad_account_id entity_data['page_post_id'] = job_scope.entity_id # Store the individual datum, use job context for the cold # storage thing to divine whatever it needs from the job context store(entity_data) yield entity_data
def collect_pages_from_business(job_scope: JobScope, _job_context: JobContext) -> int: """ Collect all facebook pages that are active """ if job_scope.report_variant != Entity.Page: raise ValueError( f"Report level {job_scope.report_variant} specified is not: {Entity.Page}" ) token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) # We don't use it for getting a token. Something else that calls us does. # However, we use it to report usages of the token we got. token_manager = PlatformTokenManager.from_job_scope(job_scope) with PlatformApiContext(token) as fb_ctx: fb_req = FacebookRequest(node_id="me", method="GET", endpoint="/businesses", api=fb_ctx.api, api_type='EDGE', target_class=Business) businesses = fb_req.execute() report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched, job_scope) token_manager.report_usage(token) entity_type = Entity.Page record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) cnt = 0 for biz in businesses: client_pages = list( biz.get_client_pages(fields=get_default_fields(Page))) owned_pages = list( biz.get_owned_pages(fields=get_default_fields(Page))) pages_list = client_pages + owned_pages for page_inst in pages_list: entity_data = page_inst.export_all_data() record_id_base_data.update(entity_id=entity_data.get('id')) entity_data = add_vendor_data( entity_data, id=generate_universal_id(**record_id_base_data)) store = NormalStore(job_scope) store.store(entity_data) cnt += 1 report_job_status_task.delay(ExternalPlatformJobStatus.Done, job_scope) return cnt
def collect_adaccount(job_scope: JobScope) -> Dict[str, Any]: """ Collects ad account data for a AA specific JobScope definition. :param JobScope job_scope: The JobScope as we get it from the task itself """ if job_scope.report_variant != Entity.AdAccount: raise ValueError( f"Report level {job_scope.report_variant} specified is not: {Entity.AdAccount}" ) token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) assert ( job_scope.ad_account_id == job_scope.entity_id ), f'This is an ad account entity job, account_id should be equal to entity_id' # Used to report token usage by this job token_manager = PlatformTokenManager.from_job_scope(job_scope) with PlatformApiContext(token) as fb_ctx: ad_account = fb_ctx.to_fb_model(job_scope.ad_account_id, Entity.AdAccount) fields = get_default_fields(ad_account.__class__) ad_account_with_selected_fields = ad_account.api_get( fields=fields) # Read just the fields we need ad_account_data_dict = ad_account_with_selected_fields.export_all_data( ) # Export the object to a dict token_manager.report_usage(token) job_scope_base = { # Duplicate the job_scope data to avoid mutating it **job_scope.to_dict(), 'entity_type': Entity.AdAccount, 'report_variant': None, } augmented_ad_account_data = add_vendor_data( # Augment the data returned from the remote API with our vendor data ad_account_data_dict, id=generate_universal_id(**job_scope_base), ) feedback_entity_task.delay(ad_account_data_dict, job_scope.report_variant) store = NormalStore(job_scope) store.store(augmented_ad_account_data) # TODO: feedback account? this probably wouldn't make sense at the moment # because ad accounts are discovered from console and their lifecycle is controlled from there. return ad_account_data_dict
def iter_collect_entities_per_page( job_scope: JobScope) -> Generator[Dict[str, Any], None, None]: """ Collects an arbitrary entity for a page """ token, entity_type, root_fb_entity = _extract_token_entity_type_parent_entity( job_scope, [Entity.PagePost, Entity.PageVideo], Entity.Page, 'ad_account_id') entities = iter_native_entities_per_page(root_fb_entity, entity_type) record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) token_manager = PlatformTokenManager.from_job_scope(job_scope) with ChunkDumpStore( job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store, ChunkDumpStore( job_scope, chunk_size=DEFAULT_CHUNK_SIZE, bucket_type=ColdStoreBucketType.RAW_BUCKET, custom_namespace=NAMESPACE_RAW, ) as raw_store: cnt = 0 for entity in entities: entity_data = entity.export_all_data() entity_data = add_vendor_data(entity_data, id=generate_universal_id( entity_id=entity_data.get('id'), **record_id_base_data)) entity_data['page_id'] = job_scope.ad_account_id if entity_type == Entity.PagePost: # store raw version of response (just to remain consistent) raw_store(entity_data) entity_data = _augment_page_post(entity_data) # Store the individual datum, use job context for the cold # storage thing to divine whatever it needs from the job context store(entity_data) # Signal to the system the new entity feedback_entity_task.delay(entity_data, entity_type) yield entity_data cnt += 1 if cnt % 1000 == 0: # default paging size for entities per parent # is typically around 200. So, each 200 results # means about 5 hits to FB token_manager.report_usage(token, 5) token_manager.report_usage(token)
def sync_expectations(job_scope: JobScope): assert (job_scope.report_type == ReportType.sync_expectations ), 'Only sync_expectations report type is processed by this task' if job_scope.ad_account_id: # this is per AA task. No need to iterate over all ad_account_ids_iter = [job_scope.ad_account_id] else: ad_account_ids_iter = expecations_store.iter_expectations_ad_accounts( sweep_id=job_scope.sweep_id) for ad_account_id in ad_account_ids_iter: ad_account_scoped_job_scope = JobScope(job_scope.to_dict(), ad_account_id=ad_account_id, entity_type=Entity.AdAccount, entity_id=ad_account_id) with ChunkDumpStore(ad_account_scoped_job_scope, chunk_size=200) as store: job_ids_iter = expecations_store.iter_expectations_per_ad_account( ad_account_id, ad_account_scoped_job_scope.sweep_id) for job_id in job_ids_iter: job_id_parts = parse_id_parts(job_id) # default is platform namespace and we communicate out only those if job_id_parts.namespace == JobScope.namespace: store({ 'job_id': job_id, # 'status':'expected', 'account_id': job_id_parts.ad_account_id, 'entity_type': job_id_parts.entity_type, 'entity_id': job_id_parts.entity_id, 'report_type': job_id_parts.report_type, 'report_variant': job_id_parts.report_variant, 'range_start': _to_date_string_if_set(job_id_parts.range_start), 'range_end': _to_date_string_if_set(job_id_parts.range_end), 'platform_namespace': job_id_parts.namespace, })
def iter_collect_entities_per_page_graph( job_scope: JobScope) -> Generator[Dict[str, Any], None, None]: """ Collects an arbitrary entity for a page using graph API """ page_token_manager = PageTokenManager.from_job_scope(job_scope) with PlatformApiContext( page_token_manager.get_best_token( job_scope.ad_account_id)) as fb_ctx: page_root_fb_entity = fb_ctx.to_fb_model(job_scope.ad_account_id, Entity.Page) entity_type = job_scope.report_variant # page size reduced to avoid error: # "Please reduce the amount of data you're asking for, then retry your request" entities = iter_native_entities_per_page_graph(page_root_fb_entity, entity_type, page_size=30) record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) with ChunkDumpStore( job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store, ChunkDumpStore( job_scope, chunk_size=DEFAULT_CHUNK_SIZE, bucket_type=ColdStoreBucketType.RAW_BUCKET, custom_namespace=NAMESPACE_RAW, ) as raw_store: for entity in entities: entity_data = entity.export_all_data() entity_data = add_vendor_data(entity_data, id=generate_universal_id( entity_id=entity_data.get('id'), **record_id_base_data)) entity_data['page_id'] = job_scope.ad_account_id if entity_type == Entity.PagePostPromotable: # store raw version of response (just to remain consistent) raw_store(entity_data) entity_data = _augment_page_post(entity_data) # Store the individual datum, use job context for the cold # storage thing to divine whatever it needs from the job context store(entity_data) # Signal to the system the new entity feedback_entity_task.delay(entity_data, entity_type) yield entity_data
def iter_collect_entities_per_adaccount( job_scope: JobScope) -> Generator[Dict[str, Any], None, None]: """ Collects an arbitrary entity for an ad account """ token, entity_type, root_fb_entity = _extract_token_entity_type_parent_entity( job_scope, Entity.AA_SCOPED, Entity.AdAccount, 'ad_account_id') entities = iter_native_entities_per_adaccount(root_fb_entity, entity_type) record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) token_manager = PlatformTokenManager.from_job_scope(job_scope) with ChunkDumpStore(job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store: for cnt, entity in enumerate(entities): entity_data = entity.export_all_data() entity_data = add_vendor_data( entity_data, id=generate_universal_id( # FIXME: add a bug to facebook ads (get_ad_videos doesnt return ad videos but AbstractCrudObject) # FIXME so it is unable to access entity.Field.id then (only a problem for ad videos) entity_id=entity_data.get('id'), **record_id_base_data, ), ) # Store the individual datum, use job context for the cold # storage thing to divine whatever it needs from the job context store(entity_data) # Signal to the system the new entity feedback_entity_task.delay(entity_data, entity_type) yield entity_data if cnt % 1000 == 0: # default paging size for entities per parent # is typically around 200. So, each 200 results # means about 5 hits to FB token_manager.report_usage(token, 5) # Report on the effective task status token_manager.report_usage(token)
def __init__(self, job_scope: JobScope, bucket_type: str = ColdStoreBucketType.ORIGINAL_BUCKET): super().__init__(job_scope) normative_entity_type = job_scope.report_variant assert normative_entity_type in Entity.ALL self.job_scope_base_data = job_scope.to_dict() # since we are converting per-parent into per-child # job signature, report_variant cannot be set self.job_scope_base_data.update( entity_type=normative_entity_type, is_derivative=True, # this keeps the scope from being counted as done task by looper report_variant=None, ) self.id_attribute_name = { Entity.AdAccount: AdsInsights.Field.account_id, Entity.Campaign: AdsInsights.Field.campaign_id, Entity.AdSet: AdsInsights.Field.adset_id, Entity.Ad: AdsInsights.Field.ad_id, }[normative_entity_type] self.bucket_type = bucket_type
def test_persister_saves_job_scope_auxiliary_data_to_data_flower(self): # There is a need to save some context data that does not fit on JobIS # Persister should store that on the Data Flower. sweep_id = random.gen_string_id() entity_id = random.gen_string_id() ad_account_id = random.gen_string_id() job_id = generate_id(ad_account_id=ad_account_id, report_type=ReportType.lifetime, report_variant=Entity.Campaign) prioritized_iter = [ PrioritizationClaim( entity_id, Entity.Campaign, ReportType.lifetime, JobSignature(job_id), 100, ad_account_id=ad_account_id, timezone='Europe/London', # TODO: contemplate moving auxiliary data formation to # place where JobSignatures are generated and use that # data for Data Flower (as it was originally intended # but not implemented because saving each job's data # individually to Data Flower was too slow) ) ] persisted = persister.iter_persist_prioritized(sweep_id, prioritized_iter) cnt = 0 for item in persisted: cnt += 1 # just need to spin the generator # so it does all the saving it needs to do per item assert cnt == 1 # Now, finally, the testing: jobs_queued_actual = [] with SortedJobsQueue(sweep_id).JobsReader() as jobs_iter: for job_id, job_scope_data, score in jobs_iter: jobs_queued_actual.append((job_id, job_scope_data, score)) jobs_queued_should_be = [( job_id, # Contents of this dict is what we are testing here dict( # comes from Persister code # manually peeled off *Claim and injected into Data Flower ad_account_timezone_name='Europe/London'), 100, )] assert jobs_queued_actual == jobs_queued_should_be # And, another way of looking at it # looper.iter_tasks preassembles JobScope and should apply aux data to it. job_scope = None cnt = 0 for celery_task, job_scope, job_context, score in TaskProducer( sweep_id).iter_tasks(): cnt += 1 # this just needs to spin once assert cnt == 1 job_scope_should_be = JobScope( sweep_id=sweep_id, namespace='fb', ad_account_id=ad_account_id, report_type=ReportType.lifetime, report_variant=Entity.Campaign, # \/ This is what we are testing \/ # comes from Persister code # manually peeled off *Claim and injected into Data Flower ad_account_timezone_name='Europe/London', score=100, ) assert job_scope.to_dict() == job_scope_should_be.to_dict()