def testPurgeOldTweetsDeletedLessThanExpected(self, estimateNumTweetsToDeleteMock, queryCandidateRowsMock, deleteRowsMock): estimate = purge_old_tweets._MAX_DELETE_BATCH_SIZE * 3 estimateNumTweetsToDeleteMock.return_value = estimate uidsIter = iter(xrange(estimate)) queryCandidateRowsMock.side_effect = ( lambda limit, **kwargs: tuple(itertools.islice(uidsIter, limit))) deletedCounts = [ purge_old_tweets._MAX_DELETE_BATCH_SIZE, purge_old_tweets._MAX_DELETE_BATCH_SIZE // 2, purge_old_tweets._MAX_DELETE_BATCH_SIZE ] deleteRowsMock.side_effect = iter(deletedCounts) # Execute numDeleted = purge_old_tweets.purgeOldTweets(thresholdDays=90) self.assertEqual(numDeleted, sum(deletedCounts)) self.assertEqual(estimateNumTweetsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 4) self.assertEqual(deleteRowsMock.call_count, 3)
def testPurgeOldTweetsStopAtEstimated( self, estimateNumTweetsToDeleteMock, queryCandidateRowsMock, deleteRowsMock): estimate = purge_old_tweets._MAX_DELETE_BATCH_SIZE * 2 estimateNumTweetsToDeleteMock.return_value = estimate uidsIter = iter(xrange(estimate + 1)) queryCandidateRowsMock.side_effect = ( lambda limit, **kwargs: tuple(itertools.islice(uidsIter, limit))) deleteRowsMock.side_effect = lambda uids, **kwargs: len(uids) # Execute numDeleted = purge_old_tweets.purgeOldTweets(thresholdDays=90) self.assertEqual(numDeleted, estimate) self.assertEqual(estimateNumTweetsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 2) self.assertEqual(deleteRowsMock.call_count, 2) # Make sure it didn't try to retrieve candidates beyond estimated number self.assertEqual(len(tuple(uidsIter)), 1)
def testPurgeOldTweetsFewerCandidatesThanExpected( self, estimateNumTweetsToDeleteMock, queryCandidateRowsMock, deleteRowsMock): estimate = purge_old_tweets._MAX_DELETE_BATCH_SIZE * 2 estimateNumTweetsToDeleteMock.return_value = estimate uidsIter = iter(xrange(estimate // 2)) queryCandidateRowsMock.side_effect = ( lambda limit, **kwargs: tuple(itertools.islice(uidsIter, limit))) deleteRowsMock.side_effect = lambda uids, **kwargs: len(uids) # Execute numDeleted = purge_old_tweets.purgeOldTweets(thresholdDays=90) self.assertEqual(numDeleted, estimate // 2) self.assertEqual(estimateNumTweetsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 2) self.assertEqual(deleteRowsMock.call_count, 1)
def testPurgeOldTweets(self): gcThresholdDays = 90 now = datetime.utcnow() oldRows = [ dict(uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 1), retweet=False, lang="en-us"), dict(uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 2), retweet=False, lang="en-us"), ] youngRows = [ dict(uid=uuid.uuid1().hex, created_at=now, retweet=False, lang="en-us"), dict(uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 1), retweet=False, lang="en-us"), dict(uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 2), retweet=False, lang="en-us"), ] allRows = oldRows + youngRows # Patch collectorsdb config to use a temporary database with collectorsdb_test_utils.ManagedTempRepository("purgetweets"): engine = collectorsdb.engineFactory() numInserted = engine.execute( schema.twitterTweets.insert(), # pylint: disable=E1120 allRows).rowcount self.assertEqual(numInserted, len(allRows)) # Execute numDeleted = purge_old_tweets.purgeOldTweets(gcThresholdDays) # Verify self.assertEqual(numDeleted, len(oldRows)) # Verify that only the old tweets got purged remainingRows = engine.execute( sql.select([schema.twitterTweets.c.uid])).fetchall() self.assertEqual(len(remainingRows), len(youngRows)) self.assertItemsEqual([row["uid"] for row in youngRows], [row.uid for row in remainingRows]) # pylint: disable=E1101
def testPurgeOldTweets(self): gcThresholdDays = 90 now = datetime.utcnow() oldRows = [ dict( uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 1), retweet=False, lang="en-us" ), dict( uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 2), retweet=False, lang="en-us" ), ] youngRows = [ dict(uid=uuid.uuid1().hex, created_at=now, retweet=False, lang="en-us"), dict( uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 1), retweet=False, lang="en-us" ), dict( uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 2), retweet=False, lang="en-us" ), ] allRows = oldRows + youngRows # Patch collectorsdb config to use a temporary database with collectorsdb_test_utils.ManagedTempRepository("purgetweets"): engine = collectorsdb.engineFactory() numInserted = engine.execute(schema.twitterTweets.insert(), allRows).rowcount # pylint: disable=E1120 self.assertEqual(numInserted, len(allRows)) # Execute numDeleted = purge_old_tweets.purgeOldTweets(gcThresholdDays) # Verify self.assertEqual(numDeleted, len(oldRows)) # Verify that only the old tweets got purged remainingRows = engine.execute(sql.select([schema.twitterTweets.c.uid])).fetchall() self.assertEqual(len(remainingRows), len(youngRows)) self.assertItemsEqual( [row["uid"] for row in youngRows], [row.uid for row in remainingRows] ) # pylint: disable=E1101
def testPurgeOldTweetsWithoutOldRecords(self, estimateNumTweetsToDeleteMock, queryCandidateRowsMock, deleteRowsMock): estimateNumTweetsToDeleteMock.return_value = 0 # These should not be called in this test queryCandidateRowsMock.side_effect = [] deleteRowsMock.side_effect = [] numDeleted = purge_old_tweets.purgeOldTweets(thresholdDays=90) self.assertEqual(numDeleted, 0) self.assertEqual(estimateNumTweetsToDeleteMock.call_count, 1) self.assertEqual(queryCandidateRowsMock.call_count, 0) self.assertEqual(deleteRowsMock.call_count, 0)