def test_sharding_does_not_break_and_does_not_duplicate(self):
        # Comes from understanding that we need to write more than some multiple
        # larger than number of internal shards (so currently 10, let's make it
        # double that for fun)
        jobs_to_generate = SortedJobsQueue._JOBS_READER_BATCH_SIZE * 20
        extra_data = {'timezone': 'Some/Thing'}

        jobs_to_write = [(
            generate_id(
                ad_account_id='AAID',
                entity_id=str(bogus_score),
                report_type=ReportType.entity,
                report_variant=Entity.Campaign,
            ),
            bogus_score,
        ) for bogus_score in range(0, jobs_to_generate)]

        with SortedJobsQueue(self.sweep_id).JobsWriter() as add_to_queue:
            for job_id, score in jobs_to_write:
                # writes tasks to distributed sorting queues
                add_to_queue(job_id, score, **extra_data)

        with SortedJobsQueue(self.sweep_id).JobsReader() as jobs_iter:
            cnt = 0

            for job_id, job_scope_data, score in jobs_iter:
                assert job_id is not None
                assert job_scope_data is not None
                assert score is not None
                cnt += 1

        assert cnt == jobs_to_generate
示例#2
0
    def test_task_does_not_blow_up(self):
        # this is almost same thing as the next test
        # where we check that call signature is right,
        # but when call signature changes and our tests don't,
        # it becomes irrelevant if we have tests - they check for wrong thing
        # So, here we actually call "store" and in next test
        # we intercept the call and check payload.
        # Don't remove me. Not duplicate.

        expectation_job_id = generate_id(
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.day_hour,
            report_variant=Entity.Ad,
            range_start='2000-01-01',
        )
        rr = [expectation_job_id]

        sync_expectations_job_scope = JobScope(
            sweep_id=random.gen_string_id(),
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.sync_expectations,
        )

        with mock.patch.object(expecations_store,
                               'iter_expectations_per_ad_account',
                               return_value=rr):
            sync_expectations_task.sync_expectations(
                sync_expectations_job_scope)
def test_aa_collection_expectation():
    ad_account_id = gen_string_id()

    reality_claim = RealityClaim(ad_account_id=ad_account_id,
                                 entity_id=ad_account_id,
                                 entity_type=Entity.AdAccount)

    def is_adaccount_entity_job(expectation_claim):
        parsed_id_parts = parse_id_parts(expectation_claim.job_id)
        return parsed_id_parts.report_type == ReportType.entity and parsed_id_parts.report_variant == Entity.AdAccount

    adaccount_entity_expectations = list(
        filter(is_adaccount_entity_job, iter_expectations([reality_claim])))

    assert len(adaccount_entity_expectations) == 1
    expectation_claim = adaccount_entity_expectations[0]

    assert expectation_claim.entity_id == reality_claim.ad_account_id
    assert expectation_claim.entity_type == reality_claim.entity_type

    assert expectation_claim.job_id == generate_id(
        ad_account_id=ad_account_id,
        entity_id=ad_account_id,
        report_type=ReportType.entity,
        report_variant=Entity.AdAccount,
    )
示例#4
0
def lifetime_metrics_per_entity_under_ad_account(
        entity_type: str, reality_claim: RealityClaim
) -> Generator[ExpectationClaim, None, None]:
    """Generate ad-account level expectation claims for lifetime."""
    if not reality_claim.timezone:
        return

    # TODO: Remove once all entities have parent ids
    # Divide tasks only if parent levels are defined for all ads
    is_dividing_possible = True

    root_node = EntityNode(reality_claim.entity_id, reality_claim.entity_type)
    for child_claim in iter_reality_per_ad_account_claim(
            reality_claim, entity_types=[entity_type]):
        is_dividing_possible = is_dividing_possible and child_claim.all_parent_ids_set
        new_node = EntityNode(child_claim.entity_id, child_claim.entity_type)
        root_node.add_node(new_node, path=child_claim.parent_entity_ids)

    logger.warning(
        f'[dividing-possible] Ad Account {reality_claim.ad_account_id} Dividing possible: {is_dividing_possible}'
    )

    yield ExpectationClaim(
        reality_claim.entity_id,
        reality_claim.entity_type,
        ReportType.lifetime,
        entity_type,
        JobSignature(
            generate_id(ad_account_id=reality_claim.ad_account_id,
                        report_type=ReportType.lifetime,
                        report_variant=entity_type)),
        ad_account_id=reality_claim.ad_account_id,
        entity_hierarchy=root_node if is_dividing_possible else None,
        timezone=reality_claim.timezone,
    )
示例#5
0
    def test_task_is_called_with_right_data(self):

        range_start = now()
        range_start_should_be = range_start.strftime('%Y-%m-%d')

        expected_job_id = generate_id(
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.day_hour,
            report_variant=Entity.Ad,
            range_start=range_start,
        )
        rr = [expected_job_id]
        expected_job_id_parts = parse_id_parts(expected_job_id)

        sync_expectations_job_scope = JobScope(
            sweep_id=random.gen_string_id(),
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.sync_expectations,
        )

        with mock.patch.object(expecations_store,
                               'iter_expectations_per_ad_account',
                               return_value=rr) as jid_iter, mock.patch.object(
                                   cold_storage.ChunkDumpStore,
                                   'store') as store:

            sync_expectations_task.sync_expectations(
                sync_expectations_job_scope)

        assert jid_iter.called
        aa, kk = jid_iter.call_args
        assert not kk
        assert aa == (sync_expectations_job_scope.ad_account_id,
                      sync_expectations_job_scope.sweep_id)

        assert store.called
        aa, kk = store.call_args
        assert not kk
        assert len(aa) == 1

        data = aa[0]

        assert data == {
            'job_id': expected_job_id,
            # missing "ad_" is intentional.
            # this matches this attr name as sent by FB
            # and ysed by us elsewhere in the company
            'account_id': expected_job_id_parts.ad_account_id,
            'entity_type': expected_job_id_parts.entity_type,
            'entity_id': expected_job_id_parts.entity_id,
            'report_type': expected_job_id_parts.report_type,
            'report_variant': expected_job_id_parts.report_variant,
            'range_start':
            range_start_should_be,  # checking manually to ensure it's properly stringified
            'range_end': None,
            'platform_namespace': JobScope.namespace,  # default platform value
        }
 def job_id(self) -> str:
     return generate_id(
         ad_account_id=self.ad_account_id,
         entity_type=self.entity_type,
         entity_id=self.entity_id,
         report_type=self.report_type,
         report_variant=self.report_variant,
         range_start=self.range_start,
         range_end=self.range_end,
         namespace=self.namespace,
     )
    def test_storage_works(self):

        # note the order of insertion - not in order of the score
        scored_jobs = [
            (generate_id(ad_account_id='AAID',
                         report_type=ReportType.entity,
                         report_variant=Entity.Campaign), 20),
            (generate_id(ad_account_id='AAID',
                         report_type=ReportType.entity,
                         report_variant=Entity.AdSet), 30),
            (generate_id(ad_account_id='AAID',
                         report_type=ReportType.entity,
                         report_variant=Entity.Ad), 10),
        ]

        extra_data = {'timezone': 'Europe/London'}

        with SortedJobsQueue(self.sweep_id).JobsWriter() as add_to_queue:
            for job_id, score in scored_jobs:
                # writes tasks to distributed sorting queues
                add_to_queue(job_id, score, **extra_data)

        jobs_queued_actual = []

        with SortedJobsQueue(self.sweep_id).JobsReader() as jobs_iter:
            for job_id, job_scope_data, score in jobs_iter:
                jobs_queued_actual.append((job_id, job_scope_data, score))

        jobs_queued_should_be = [
            (
                generate_id(ad_account_id='AAID',
                            report_type=ReportType.entity,
                            report_variant=Entity.AdSet),
                {
                    'timezone': 'Europe/London'
                },
                30.0,  # <-----
            ),
            (
                generate_id(ad_account_id='AAID',
                            report_type=ReportType.entity,
                            report_variant=Entity.Campaign),
                {
                    'timezone': 'Europe/London'
                },
                20.0,  # <-----
            ),
            (
                generate_id(ad_account_id='AAID',
                            report_type=ReportType.entity,
                            report_variant=Entity.Ad),
                {
                    'timezone': 'Europe/London'
                },
                10.0,  # <-----
            ),
        ]

        assert jobs_queued_actual == jobs_queued_should_be
示例#8
0
def page_entity(
        reality_claim: RealityClaim
) -> Generator[ExpectationClaim, None, None]:
    assert reality_claim.entity_type == Entity.Page, 'Page expectation should be triggered only by page reality claims'

    yield ExpectationClaim(
        reality_claim.entity_id,
        reality_claim.entity_type,
        ReportType.entity,
        Entity.Page,
        JobSignature(
            generate_id(
                ad_account_id=reality_claim.ad_account_id,
                entity_id=reality_claim.entity_id,
                report_type=ReportType.entity,
                report_variant=Entity.Page,
            )),
        ad_account_id=reality_claim.ad_account_id,
    )
示例#9
0
    def test_some_data_at_end(self):
        ad_account_id = random.gen_string_id()
        report_type = 'blah'

        id_should_be = D.join([
            # 'oprm',
            # 'm',
            NS,
            ad_account_id,
            '',  # entity Type
            '',  # entity ID
            report_type,
            # '', # report variant
            # '', # Range start
            # '', # Range end
        ])

        assert id_should_be == id_tools.generate_id(
            ad_account_id=ad_account_id, report_type=report_type)
示例#10
0
def entities_per_page(
        entity_type: str, reality_claim: RealityClaim
) -> Generator[ExpectationClaim, None, None]:
    """
    Generates "fetch EntityType entities metadata per given Page" job call sig
    """
    assert entity_type in Entity.NON_AA_SCOPED

    yield ExpectationClaim(
        reality_claim.entity_id,
        reality_claim.entity_type,
        ReportType.entity,
        entity_type,
        JobSignature(
            generate_id(ad_account_id=reality_claim.ad_account_id,
                        report_type=ReportType.entity,
                        report_variant=entity_type)),
        ad_account_id=reality_claim.ad_account_id,
    )
def test_aa_import_expectation_generated():
    entity_id = gen_string_id()

    reality_claim = RealityClaim(entity_type=Entity.Scope, entity_id=entity_id)

    results = list(iter_expectations([reality_claim]))

    assert len(results) == 2
    expectation_claim = results[0]

    assert expectation_claim.entity_id == reality_claim.entity_id
    assert expectation_claim.entity_type == reality_claim.entity_type

    assert expectation_claim.job_id == generate_id(
        namespace=config.application.UNIVERSAL_ID_SYSTEM_NAMESPACE,
        entity_type=Entity.Scope,
        entity_id=entity_id,
        report_type=ReportType.import_accounts,
        report_variant=Entity.AdAccount,
    )
示例#12
0
def lifetime_page_metrics_per_entity(
        entity_type: str, reality_claim: RealityClaim
) -> Generator[ExpectationClaim, None, None]:
    assert entity_type in Entity.ALL

    yield ExpectationClaim(
        reality_claim.entity_id,
        reality_claim.entity_type,
        ReportType.lifetime,
        entity_type,
        JobSignature(
            generate_id(
                ad_account_id=reality_claim.ad_account_id,
                entity_type=reality_claim.entity_type,
                entity_id=reality_claim.entity_id,
                report_type=ReportType.lifetime,
                report_variant=entity_type,
            )),
        ad_account_id=reality_claim.ad_account_id,
    )
示例#13
0
def generate_child_claims(
        claim: ExpectationClaim) -> Generator[ExpectationClaim, None, None]:
    for child_entity_node in claim.entity_hierarchy.children:
        yield ExpectationClaim(
            child_entity_node.entity_id,
            child_entity_node.entity_type,
            claim.report_type,
            claim.report_variant,
            JobSignature(
                generate_id(
                    ad_account_id=claim.ad_account_id,
                    range_start=claim.range_start,
                    report_type=claim.report_type,
                    report_variant=claim.report_variant,
                    entity_id=child_entity_node.entity_id,
                    entity_type=child_entity_node.entity_type,
                )),
            ad_account_id=claim.ad_account_id,
            timezone=claim.timezone,
            entity_hierarchy=child_entity_node,
            range_start=claim.range_start,
        )
示例#14
0
def ad_account_entity(
        reality_claim: RealityClaim
) -> Generator[ExpectationClaim, None, None]:
    assert (
        reality_claim.entity_type == Entity.AdAccount
    ), 'Ad account expectation should be triggered only by ad account reality claims'

    yield ExpectationClaim(
        reality_claim.entity_id,
        reality_claim.entity_type,
        ReportType.entity,
        Entity.AdAccount,
        JobSignature(
            generate_id(
                ad_account_id=reality_claim.ad_account_id,
                entity_id=reality_claim.entity_id,
                report_type=ReportType.entity,
                report_variant=Entity.AdAccount,
            )),
        ad_account_id=reality_claim.ad_account_id,
        timezone=reality_claim.timezone,
    )
示例#15
0
def entities_per_ad_account(
        entity_type: str, reality_claim: RealityClaim
) -> Generator[ExpectationClaim, None, None]:
    """
    Generates "fetch EntityType entities metadata per given AA" job call sig
    """
    # Mental Note:
    # This job signature generator is designed to be parked
    # *under AdAccount job signatures generators inventory*
    # In other words, NOT under EntityType inventory (where it would be called
    # for each EntityType).
    # Unlike metrics report types,
    # We don't have an effective "fetch single EntityType entity data per EntityType ID" task (yet).
    # So, instead of generating many job signatures per EntityTypes,
    # we create only one per-parent-AA, and making that
    # into "normative_job_signature" per AA level.
    # When we have a need to have atomic per-EntityType entity data collection celery task,
    # atomic per-C entity data job signature would go into normative column
    # on ExpectationClaim for each and separate EntityType and per-parent-AA
    # job signature will go into "effective_job_signatures" list on those claims,
    # AND this function must move from AA-level to EntityType-level signature
    # generators inventory.

    assert entity_type in Entity.ALL

    yield ExpectationClaim(
        reality_claim.entity_id,
        reality_claim.entity_type,
        ReportType.entity,
        entity_type,
        JobSignature(
            generate_id(ad_account_id=reality_claim.ad_account_id,
                        report_type=ReportType.entity,
                        report_variant=entity_type)),
        ad_account_id=reality_claim.ad_account_id,
        timezone=reality_claim.timezone,
    )
示例#16
0
def pages_per_scope(
        reality_claim: RealityClaim
) -> Generator[ExpectationClaim, None, None]:
    """
    Generates "fetch Pages active entity metadata per given scope" job ID

    To be used by Scope-level RealityClaim / ExpectationClaim.
    """
    yield ExpectationClaim(
        reality_claim.entity_id,
        reality_claim.entity_type,
        ReportType.import_pages,
        Entity.Page,
        JobSignature(
            generate_id(
                namespace=config.application.UNIVERSAL_ID_SYSTEM_NAMESPACE,
                # Note absence of value for Page
                # This is "all Pages per scope X" job.
                entity_id=reality_claim.entity_id,
                entity_type=reality_claim.entity_type,
                report_type=ReportType.import_pages,
                report_variant=Entity.Page,
            )),
    )
示例#17
0
    def test_some_data_trailing(self):
        ad_account_id = random.gen_string_id()
        report_type = 'blah'

        id_should_be = D.join([
            # 'oprm',
            # 'm',
            NS,
            ad_account_id,
            '',  # entity Type
            '',  # entity ID
            report_type,
            '',  # report variant
            '',  # Range start
            '',  # Range end
            'hocus',
            '',  # None
            'pocus',
        ])

        assert id_should_be == id_tools.generate_id(
            ad_account_id=ad_account_id,
            report_type=report_type,
            trailing_parts=['hocus', None, 'pocus'])
    def test_persister_saves_job_scope_auxiliary_data_to_data_flower(self):
        # There is a need to save some context data that does not fit on JobIS
        # Persister should store that on the Data Flower.

        sweep_id = random.gen_string_id()
        entity_id = random.gen_string_id()
        ad_account_id = random.gen_string_id()

        job_id = generate_id(ad_account_id=ad_account_id,
                             report_type=ReportType.lifetime,
                             report_variant=Entity.Campaign)

        prioritized_iter = [
            PrioritizationClaim(
                entity_id,
                Entity.Campaign,
                ReportType.lifetime,
                JobSignature(job_id),
                100,
                ad_account_id=ad_account_id,
                timezone='Europe/London',
                # TODO: contemplate moving auxiliary data formation to
                #       place where JobSignatures are generated and use that
                #       data for Data Flower (as it was originally intended
                #       but not implemented because saving each job's data
                #       individually to Data Flower was too slow)
            )
        ]

        persisted = persister.iter_persist_prioritized(sweep_id,
                                                       prioritized_iter)
        cnt = 0
        for item in persisted:
            cnt += 1
            # just need to spin the generator
            # so it does all the saving it needs to do per item
        assert cnt == 1

        # Now, finally, the testing:

        jobs_queued_actual = []
        with SortedJobsQueue(sweep_id).JobsReader() as jobs_iter:
            for job_id, job_scope_data, score in jobs_iter:
                jobs_queued_actual.append((job_id, job_scope_data, score))

        jobs_queued_should_be = [(
            job_id,
            # Contents of this dict is what we are testing here
            dict(
                # comes from Persister code
                # manually peeled off *Claim and injected into Data Flower
                ad_account_timezone_name='Europe/London'),
            100,
        )]

        assert jobs_queued_actual == jobs_queued_should_be

        # And, another way of looking at it
        # looper.iter_tasks preassembles JobScope and should apply aux data to it.

        job_scope = None
        cnt = 0
        for celery_task, job_scope, job_context, score in TaskProducer(
                sweep_id).iter_tasks():
            cnt += 1
            # this just needs to spin once
        assert cnt == 1

        job_scope_should_be = JobScope(
            sweep_id=sweep_id,
            namespace='fb',
            ad_account_id=ad_account_id,
            report_type=ReportType.lifetime,
            report_variant=Entity.Campaign,
            # \/ This is what we are testing \/
            # comes from Persister code
            # manually peeled off *Claim and injected into Data Flower
            ad_account_timezone_name='Europe/London',
            score=100,
        )

        assert job_scope.to_dict() == job_scope_should_be.to_dict()
示例#19
0
 def test_it_works_with_nulls(self):
     assert NS == id_tools.generate_id()