Пример #1
0
def test_system(get_dataframe):
    """
    Test whether the system method runs without failing.
    """
    # it is necessary to run a while loop since sometimes the system method
    # does not return any rows.
    df = []
    while len(df) == 0:
        df = get_dataframe(
            UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
                size=20, sampling_method="system"
            )
        )
    assert list(df.columns) == ["subscriber"]
    assert len(df) == 20

    # it is necessary to run a while loop since sometimes the system method
    # does not return any rows.
    df = []
    while len(df) == 0:
        df = get_dataframe(
            UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
                fraction=0.25, sampling_method="system"
            )
        )
    assert list(df.columns) == ["subscriber"]
Пример #2
0
def test_gets_parent_attributes():
    """
    Test that a random sample is an instance of the sampled thing.
    """
    qur = UniqueSubscribers(start="2016-01-01", stop="2016-01-04", hours=(4, 17))
    sample = qur.random_sample(
        size=10, sampling_method="bernoulli", estimate_count=False
    )
    assert sample.hours == (4, 17)
Пример #3
0
def test_is_subclass():
    """
    Test that a random sample is an instance of the sampled thing. 
    """
    qur = UniqueSubscribers(start="2016-01-01", stop="2016-01-04")
    sample = qur.random_sample(size=10,
                               sampling_method="bernoulli",
                               estimate_count=False)
    assert isinstance(sample, UniqueSubscribers)
Пример #4
0
def test_seeded_random_zero(sample_method):
    """
    Test that using 0 as seed results in reproducible outcomes
    """

    sample = UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
        size=10, sampling_method=sample_method, seed=0
    )
    assert sample.get_query() == sample.get_query()
Пример #5
0
def gets_parent_attributes():
    """
    Test that a random sample is an instance of the sampled thing.
    """
    qur = UniqueSubscribers(start="2016-01-01",
                            stop="2016-01-04",
                            level="admin3")
    sample = qur.random_sample(size=10,
                               method="bernoulli",
                               estimate_count=False)
    assert sample.level == "admin3"
Пример #6
0
def test_seeded_random(sample_method, get_dataframe):
    """
    Tests whether class selects a repeatable random sample.
    """

    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method=sample_method, seed=0.1))
    df2 = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method=sample_method, seed=0.1))
    assert df.values.tolist() == df2.values.tolist()
Пример #7
0
def test_system_rows(get_dataframe):
    """
    Test whether the system_rows method runs without failing.
    """
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="system_rows"))
    assert len(df) == 10
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            fraction=0.1, sampling_method="system_rows"))
    assert len(df) == 50
Пример #8
0
def test_bernoulli(get_dataframe):
    """
    Test whether the bernoulli method runs without failing.
    """
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="bernoulli"))
    assert list(df.columns) == ["subscriber"]
    assert len(df) == 10

    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            fraction=0.1, sampling_method="bernoulli"))
    assert list(df.columns) == ["subscriber"]
Пример #9
0
class test_unique_subscribers(TestCase):
    def setUp(self):

        self.UU = UniqueSubscribers("2016-01-01", "2016-01-02")

    def test_returns_set(self):
        """
        UniqueSubscribers() returns set.
        """
        self.assertIs(type(self.UU.as_set()), set)

    def test_subscribers_unique(self):
        """
        Returned dataframe has unique subscribers.
        """

        self.assertTrue(self.UU.get_dataframe()["subscriber"].is_unique)
Пример #10
0
def test_seeded_random_badmethod():
    """
    Tests whether seeds don't work with system_rows.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            10, method="system_rows", seed=-0.5)
Пример #11
0
def test_bad_must_provide_either_sample_size_or_fraction():
    """
    Should raise an error if both sample size and fraction are passed.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01",
                          stop="2016-01-04").random_sample(10, fraction=0.5)
Пример #12
0
def test_bad_must_provide_sample_size_or_fraction():
    """
    Should raise an error if neither sample size nor fraction is passed.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01",
                          stop="2016-01-04").random_sample(None, fraction=None)
Пример #13
0
def test_seeded_random_oob():
    """
    Tests whether seeds are restricted to within +/-1.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="random_ids", seed=-50)
Пример #14
0
def test_bad_method_errors():
    """
    Bad sampling methods should raise an error.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="BAD_METHOD_TYPE", seed=-50)
Пример #15
0
def test_not_estimate_count(get_dataframe):
    """
    Test whether not estimating counts runs without failing.
    """
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="bernoulli", estimate_count=False))
    assert list(df.columns) == ["subscriber"]
    assert len(df) == 10
Пример #16
0
def test_seeded_random_badmethod():
    """
    Tests whether seeds don't work with system_rows.
    """

    with pytest.raises(TypeError,
                       match="got an unexpected keyword argument 'seed'"):
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="system_rows", seed=-0.5)
Пример #17
0
def test_random_msisdn(get_dataframe):
    """
    Tests whether class selects a random sample of msisdn without failing.
    """
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01",
                          stop="2016-01-04").random_sample(size=10))
    assert list(df.columns) == ["subscriber"]
    assert len(df) == 10
Пример #18
0
    def test_subscribers_make_atleast_one_call_in_admin0(self):
        """
        The set of subscribers who make at least one call within admin0 over
        whole test time period should be equal to set of unique subscribers
        in test calls table.
        """

        start, stop = "2016-01-01", "2016-01-07"

        sls = SubscriberLocationSubset(start,
                                       stop,
                                       min_calls=1,
                                       level="admin0")
        us = UniqueSubscribers(start, stop, table="events.calls")

        sls_subs = set(sls.get_dataframe()["subscriber"])
        us_subs = set(us.get_dataframe()["subscriber"])

        self.assertEquals(sls_subs, us_subs)
Пример #19
0
def test_pickling():
    """
    Test that we can pickle and unpickle random classes.
    """
    ss1 = UniqueSubscribers(start="2016-01-01",
                            stop="2016-01-04").random_sample(
                                size=10, sampling_method="system_rows")
    ss2 = Table("events.calls").random_sample(size=10,
                                              sampling_method="bernoulli",
                                              seed=0.73)
    for ss in [ss1, ss2]:
        assert ss.get_query() == pickle.loads(pickle.dumps(ss)).get_query()
        assert ss.query_id == pickle.loads(pickle.dumps(ss)).query_id
Пример #20
0
def test_unique_subscriber_column_names():
    """Test that column_names property of UniqueSubscribers is accurate"""
    us = UniqueSubscribers("2016-01-01", "2016-01-02")
    assert us.head(0).columns.tolist() == us.column_names
Пример #21
0
    def setUp(self):

        self.UU = UniqueSubscribers("2016-01-01", "2016-01-02")