Пример #1
0
def test_filter_clusters_already_filtered(filterable_cluster):
    # Filter our good cluster.
    filter_clusters()

    # Add check we can't filter it again.
    with pytest.raises(AlreadyFiltered):
        filter_clusters()
Пример #2
0
def test_filter_clusters_kept(filterable_cluster):
    # Our cluster gets all its quotes filtered out but one (#0),
    # and is then kept.
    filter_clusters()
    with session_scope() as session:
        fcluster = session.query(Cluster)\
            .filter(Cluster.filtered.is_(True)).one()
        assert fcluster.size == 1
        assert fcluster.quotes.first().sid == 0
Пример #3
0
def test_filter_clusters_emptied(filterable_cluster):
    # Modify our cluster to make it bad.
    with session_scope() as session:
        quote = session.query(Quote).filter(Quote.sid == 0).one()
        timestamps = quote.url_timestamps.copy()
        timestamps[1] = datetime.utcnow() + timedelta(days=81)
        quote.url_timestamps = timestamps

    # Check our cluster gets filtered out.
    filter_clusters()
    with session_scope() as session:
        assert session.query(Cluster)\
            .filter(Cluster.filtered.is_(True)).count() == 0
Пример #4
0
def test_filter_clusters_too_long(filterable_cluster):
    # Modify our cluster to make it too long after quote filtering.
    with session_scope() as session:
        cluster = session.query(Cluster).first()
        # This quote is all good, but is too far from quote sid=0, leading
        # the cluster span to be too long.
        quote = Quote(sid=5, string='a string with enough '
                                    'words and no problems')
        quote.add_url(
            Url(timestamp=datetime.utcnow() + timedelta(days=80, hours=1),
                frequency=2, url_type='M', url='some-url')
        )
        cluster.quotes.append(quote)

    # Now check our cluster gets filtered out.
    filter_clusters()
    with session_scope() as session:
        assert session.query(Cluster)\
            .filter(Cluster.filtered.is_(True)).count() == 0
Пример #5
0
def filter_memetracker(limit):
    """Filter MemeTracker data."""

    logger.info('Starting filtering of memetracker data')
    filter_clusters(limit=limit)
    logger.info('Done filtering memetracker data')