def testPurgeOldTweetsDeletedLessThanExpected(self,
                                                estimateNumTweetsToDeleteMock,
                                                queryCandidateRowsMock,
                                                deleteRowsMock):

    estimate = purge_old_tweets._MAX_DELETE_BATCH_SIZE * 3

    estimateNumTweetsToDeleteMock.return_value = estimate


    uidsIter = iter(xrange(estimate))

    queryCandidateRowsMock.side_effect = (
      lambda limit, **kwargs: tuple(itertools.islice(uidsIter, limit)))

    deletedCounts = [
      purge_old_tweets._MAX_DELETE_BATCH_SIZE,
      purge_old_tweets._MAX_DELETE_BATCH_SIZE // 2,
      purge_old_tweets._MAX_DELETE_BATCH_SIZE
    ]

    deleteRowsMock.side_effect = iter(deletedCounts)

    # Execute
    numDeleted = purge_old_tweets.purgeOldTweets(thresholdDays=90)

    self.assertEqual(numDeleted, sum(deletedCounts))

    self.assertEqual(estimateNumTweetsToDeleteMock.call_count, 1)

    self.assertEqual(queryCandidateRowsMock.call_count, 4)
    self.assertEqual(deleteRowsMock.call_count, 3)
  def testPurgeOldTweetsStopAtEstimated(
      self,
      estimateNumTweetsToDeleteMock,
      queryCandidateRowsMock,
      deleteRowsMock):

    estimate = purge_old_tweets._MAX_DELETE_BATCH_SIZE * 2

    estimateNumTweetsToDeleteMock.return_value = estimate


    uidsIter = iter(xrange(estimate + 1))

    queryCandidateRowsMock.side_effect = (
      lambda limit, **kwargs: tuple(itertools.islice(uidsIter, limit)))

    deleteRowsMock.side_effect = lambda uids, **kwargs: len(uids)

    # Execute
    numDeleted = purge_old_tweets.purgeOldTweets(thresholdDays=90)

    self.assertEqual(numDeleted, estimate)

    self.assertEqual(estimateNumTweetsToDeleteMock.call_count, 1)

    self.assertEqual(queryCandidateRowsMock.call_count, 2)
    self.assertEqual(deleteRowsMock.call_count, 2)

    # Make sure it didn't try to retrieve candidates beyond estimated number
    self.assertEqual(len(tuple(uidsIter)), 1)
  def testPurgeOldTweetsFewerCandidatesThanExpected(
      self,
      estimateNumTweetsToDeleteMock,
      queryCandidateRowsMock,
      deleteRowsMock):

    estimate = purge_old_tweets._MAX_DELETE_BATCH_SIZE * 2

    estimateNumTweetsToDeleteMock.return_value = estimate


    uidsIter = iter(xrange(estimate // 2))

    queryCandidateRowsMock.side_effect = (
      lambda limit, **kwargs: tuple(itertools.islice(uidsIter, limit)))

    deleteRowsMock.side_effect = lambda uids, **kwargs: len(uids)

    # Execute
    numDeleted = purge_old_tweets.purgeOldTweets(thresholdDays=90)

    self.assertEqual(numDeleted, estimate // 2)

    self.assertEqual(estimateNumTweetsToDeleteMock.call_count, 1)

    self.assertEqual(queryCandidateRowsMock.call_count, 2)
    self.assertEqual(deleteRowsMock.call_count, 1)
    def testPurgeOldTweets(self):

        gcThresholdDays = 90

        now = datetime.utcnow()

        oldRows = [
            dict(uid=uuid.uuid1().hex,
                 created_at=now - timedelta(days=gcThresholdDays + 1),
                 retweet=False,
                 lang="en-us"),
            dict(uid=uuid.uuid1().hex,
                 created_at=now - timedelta(days=gcThresholdDays + 2),
                 retweet=False,
                 lang="en-us"),
        ]

        youngRows = [
            dict(uid=uuid.uuid1().hex,
                 created_at=now,
                 retweet=False,
                 lang="en-us"),
            dict(uid=uuid.uuid1().hex,
                 created_at=now - timedelta(days=gcThresholdDays - 1),
                 retweet=False,
                 lang="en-us"),
            dict(uid=uuid.uuid1().hex,
                 created_at=now - timedelta(days=gcThresholdDays - 2),
                 retweet=False,
                 lang="en-us"),
        ]

        allRows = oldRows + youngRows

        # Patch collectorsdb config to use a temporary database
        with collectorsdb_test_utils.ManagedTempRepository("purgetweets"):
            engine = collectorsdb.engineFactory()

            numInserted = engine.execute(
                schema.twitterTweets.insert(),  # pylint: disable=E1120
                allRows).rowcount

            self.assertEqual(numInserted, len(allRows))

            # Execute
            numDeleted = purge_old_tweets.purgeOldTweets(gcThresholdDays)

            # Verify

            self.assertEqual(numDeleted, len(oldRows))

            # Verify that only the old tweets got purged
            remainingRows = engine.execute(
                sql.select([schema.twitterTweets.c.uid])).fetchall()

            self.assertEqual(len(remainingRows), len(youngRows))

            self.assertItemsEqual([row["uid"] for row in youngRows],
                                  [row.uid for row in remainingRows])  # pylint: disable=E1101
    def testPurgeOldTweets(self):

        gcThresholdDays = 90

        now = datetime.utcnow()

        oldRows = [
            dict(
                uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 1), retweet=False, lang="en-us"
            ),
            dict(
                uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 2), retweet=False, lang="en-us"
            ),
        ]

        youngRows = [
            dict(uid=uuid.uuid1().hex, created_at=now, retweet=False, lang="en-us"),
            dict(
                uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 1), retweet=False, lang="en-us"
            ),
            dict(
                uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 2), retweet=False, lang="en-us"
            ),
        ]

        allRows = oldRows + youngRows

        # Patch collectorsdb config to use a temporary database
        with collectorsdb_test_utils.ManagedTempRepository("purgetweets"):
            engine = collectorsdb.engineFactory()

            numInserted = engine.execute(schema.twitterTweets.insert(), allRows).rowcount  # pylint: disable=E1120

            self.assertEqual(numInserted, len(allRows))

            # Execute
            numDeleted = purge_old_tweets.purgeOldTweets(gcThresholdDays)

            # Verify

            self.assertEqual(numDeleted, len(oldRows))

            # Verify that only the old tweets got purged
            remainingRows = engine.execute(sql.select([schema.twitterTweets.c.uid])).fetchall()

            self.assertEqual(len(remainingRows), len(youngRows))

            self.assertItemsEqual(
                [row["uid"] for row in youngRows], [row.uid for row in remainingRows]
            )  # pylint: disable=E1101
  def testPurgeOldTweetsWithoutOldRecords(self,
                                          estimateNumTweetsToDeleteMock,
                                          queryCandidateRowsMock,
                                          deleteRowsMock):
    estimateNumTweetsToDeleteMock.return_value = 0

    # These should not be called in this test
    queryCandidateRowsMock.side_effect = []
    deleteRowsMock.side_effect = []

    numDeleted = purge_old_tweets.purgeOldTweets(thresholdDays=90)

    self.assertEqual(numDeleted, 0)

    self.assertEqual(estimateNumTweetsToDeleteMock.call_count, 1)

    self.assertEqual(queryCandidateRowsMock.call_count, 0)
    self.assertEqual(deleteRowsMock.call_count, 0)