Пример #1
0
def test_searches_by_country(generate_data):
    snippets = [
        {
            'client_id': 'profile_0',
            'country': 'US',
            'search_counts': [search_row(engine="hooli", count=2)]
        },
        {
            'client_id': 'profile_1',
            'country': 'US',
            'search_counts': [search_row(engine="altavista", count=2)]
        },
        {
            'client_id': 'profile_2',
            'country': 'CA',
            'search_counts': [search_row(engine="altavista", count=2)]
        },
    ]
    df = generate_data(snippets)
    result = search_rollups.transform(df, "daily")

    def search_by_country(df, geo):
        return (df.where(F.col("country") == geo).select(
            F.sum("search_count")).first()[0])

    assert result.count() == 3
    assert search_by_country(result, "US") == 4
    assert search_by_country(result, "CA") == 2
Пример #2
0
def test_multiple_clients_multiple_search_engines(generate_data):
    snippets = [
        {
            'client_id':
            'profile_0',
            'search_counts': [
                search_row(engine="hooli", count=18),
                search_row(engine="altavista", count=3),
            ]
        },
        {
            'client_id':
            'profile_1',
            'search_counts': [
                search_row(engine="hooli", count=3),
                search_row(engine="altavista", count=18),
            ]
        },
    ]
    df = generate_data(snippets)
    result = search_rollups.transform(df, "daily")

    assert result.count() == 2
    assert result.select(F.sum("search_count")).first()[0] == 42
    assert (result.where("search_provider='hooli'").select(
        F.sum("search_count")).first()[0]) == 21
Пример #3
0
def test_single_client_shares_multiple_countries(generate_data):
    snippets = [
        {
            'country': 'US'
        },
        {
            'country': 'CA'
        },
    ]
    df = generate_data(snippets)
    result = search_rollups.transform(df, "daily")

    assert result.count() == 2
    assert result.select(F.sum("profile_share")).first()[0] == 1.0
Пример #4
0
def test_filter_incontent_searches(generate_data):
    snippets = [
        {
            'search_counts': [search_row(source="in-content")]
        },  # no
        {
            'search_counts': [search_row(source="contextmenu")]
        },  # yes
        {
            'search_counts': [search_row(source="abouthome")]
        },  # yes
    ]
    df = generate_data(snippets)
    result = search_rollups.transform(df, "daily")

    # in-content search should be filtered
    assert result.select(F.sum("search_count")).first()[0] == 2
Пример #5
0
def test_single_client_multiple_search_engines(generate_data):
    snippets = [
        {
            'search_counts': [
                search_row(engine="hooli", count=2),
                search_row(engine="altavista", count=4),
            ]
        },
        {
            'search_counts': [search_row("altavista")]
        },
    ]
    df = generate_data(snippets)
    result = search_rollups.transform(df, "daily")

    assert result.where("search_provider='hooli'").count() == 1
    assert result.select(F.sum("search_count")).first()[0] == 7
Пример #6
0
def test_null_row(generate_data):
    # everything except client_id is null
    snippets = [{
        'country': None,
        'default_search_engine': None,
        'distribution_id': None,
        'locale': None,
        'search_counts': None,
    }]
    df = generate_data(snippets)
    result = search_rollups.transform(df, "daily")

    row = result.where("country<>'US'").first()
    assert row.country == "XX"
    assert row.search_provider == "NO_SEARCHES"
    assert row.default_provider == "NO_DEFAULT"
    assert row.locale == "xx"
    assert row.distribution_id == "MOZILLA"
    assert row.search_count == 0
Пример #7
0
def test_transform_excludes_profile_shares_for_monthly(generate_data):
    df = generate_data(None)
    result = search_rollups.transform(df, mode="monthly")

    assert "profile_share" not in result.columns