def test_incremental_dump(self):
        base = 1500000000
        listens = generate_data(1, self.testuser_name, base - 4, 5, base +
                                1)  # generate 5 listens with inserted_ts 1-5
        self._insert_with_created(listens)
        listens = generate_data(1, self.testuser_name, base + 1, 5, base +
                                6)  # generate 5 listens with inserted_ts 6-10
        self._insert_with_created(listens)
        temp_dir = tempfile.mkdtemp()
        dump_location = self.logstore.dump_listens(
            location=temp_dir,
            dump_id=1,
            start_time=datetime.utcfromtimestamp(base + 6),
            end_time=datetime.utcfromtimestamp(base + 10))
        self.assertTrue(os.path.isfile(dump_location))
        self.reset_timescale_db()
        self.logstore.import_listens_dump(dump_location)
        listens, min_ts, max_ts = self.logstore.fetch_listens(
            user_name=self.testuser_name, to_ts=base + 11)
        self.assertEqual(len(listens), 4)
        self.assertEqual(listens[0].ts_since_epoch, base + 5)
        self.assertEqual(listens[1].ts_since_epoch, base + 4)
        self.assertEqual(listens[2].ts_since_epoch, base + 3)
        self.assertEqual(listens[3].ts_since_epoch, base + 2)

        shutil.rmtree(temp_dir)
 def test_incremental_dump(self):
     """ Dump and import listens
     """
     listens = generate_data(1, self.testuser_name, 1,
                             5)  # generate 5 listens with ts 1-5
     self.logstore.insert(listens)
     sleep(1)
     start_time = datetime.now()
     sleep(1)
     listens = generate_data(1, self.testuser_name, 6,
                             5)  # generate 5 listens with ts 6-10
     self.logstore.insert(listens)
     sleep(1)
     temp_dir = tempfile.mkdtemp()
     dump_location = self.logstore.dump_listens(
         location=temp_dir,
         dump_id=1,
         start_time=start_time,
         end_time=datetime.now(),
     )
     sleep(1)
     self.assertTrue(os.path.isfile(dump_location))
     self.reset_influx_db()
     sleep(1)
     self.logstore.import_listens_dump(dump_location)
     sleep(1)
     listens = self.logstore.fetch_listens(user_name=self.testuser_name,
                                           to_ts=11)
     self.assertEqual(len(listens), 5)
     self.assertEqual(listens[0].ts_since_epoch, 10)
     self.assertEqual(listens[1].ts_since_epoch, 9)
     self.assertEqual(listens[2].ts_since_epoch, 8)
     self.assertEqual(listens[3].ts_since_epoch, 7)
     self.assertEqual(listens[4].ts_since_epoch, 6)
Пример #3
0
    def test_time_range_full_dumps(self):
        base = 1500000000
        listens = generate_data(1, self.testuser_name, base + 1,
                                5)  # generate 5 listens with ts 1-5
        self.logstore.insert(listens)
        listens = generate_data(1, self.testuser_name, base + 6,
                                5)  # generate 5 listens with ts 6-10
        self.logstore.insert(listens)
        temp_dir = tempfile.mkdtemp()
        dump_location = self.dumpstore.dump_listens(
            location=temp_dir,
            dump_id=1,
            end_time=datetime.utcfromtimestamp(base + 5))
        self.assertTrue(os.path.isfile(dump_location))

        self.reset_timescale_db()
        self.logstore.import_listens_dump(dump_location)
        recalculate_all_user_data()

        listens, min_ts, max_ts = self.logstore.fetch_listens(
            user=self.testuser, to_ts=base + 11)
        self.assertEqual(len(listens), 5)
        self.assertEqual(listens[0].ts_since_epoch, base + 5)
        self.assertEqual(listens[1].ts_since_epoch, base + 4)
        self.assertEqual(listens[2].ts_since_epoch, base + 3)
        self.assertEqual(listens[3].ts_since_epoch, base + 2)
        self.assertEqual(listens[4].ts_since_epoch, base + 1)
 def _create_test_data(self):
     self.log.info("Inserting test data...")
     self.listen = generate_data(self.testuser_id,'test', MIN_ID + 1, 1)[0]
     listen = self.listen.to_json()
     self._redis.redis.setex('playing_now' + ':' + str(listen['user_id']),
                             ujson.dumps(listen).encode('utf-8'), self.config.PLAYING_NOW_MAX_DURATION)
     self.log.info("Test data inserted")
    def test_create_full_db(self, mock_notify):

        listens = generate_data(1, self.user_name, 1500000000, 5)
        self.listenstore.insert(listens)
        sleep(1)

        # create a full dump
        self.runner.invoke(dump_manager.create_full, ['--location', self.tempdir])
        self.assertEqual(len(os.listdir(self.tempdir)), 1)
        dump_name = os.listdir(self.tempdir)[0]
        mock_notify.assert_called_with(dump_name, 'fullexport')

        # make sure that the dump contains a full listens dump, a public dump
        # a private dump and a spark dump.
        archive_count = 0
        for file_name in os.listdir(os.path.join(self.tempdir, dump_name)):
            if file_name.endswith('.tar.xz'):
                archive_count += 1
        self.assertEqual(archive_count, 4)

        # now, remove the old dump and create a new one with the same id
        shutil.rmtree(os.path.join(self.tempdir, dump_name))
        self.runner.invoke(dump_manager.create_full, ['--location', self.tempdir, '--last-dump-id'])
        self.assertEqual(len(os.listdir(self.tempdir)), 1)
        recreated_dump_name = os.listdir(self.tempdir)[0]

        # dump names should be the exact same
        self.assertEqual(dump_name, recreated_dump_name)

        # dump should contain the 4 archives
        archive_count = 0
        for file_name in os.listdir(os.path.join(self.tempdir, dump_name)):
            if file_name.endswith('.tar.xz'):
                archive_count += 1
        self.assertEqual(archive_count, 4)
Пример #6
0
    def test_create_incremental_dump_with_id(self):

        # if the dump ID does not exist, it should exit with a -1
        result = self.runner.invoke(
            dump_manager.create_incremental,
            ['--location', self.tempdir, '--dump-id', 1000])
        self.assertEqual(result.exit_code, -1)

        # create a base dump entry
        t = int(time.time())
        db_dump.add_dump_entry(t)
        sleep(1)
        self.listenstore.insert(generate_data(1, self.user_name, 1500000000,
                                              5))
        sleep(1)

        # create a new dump ID to recreate later
        dump_id = db_dump.add_dump_entry(int(time.time()))
        # now, create a dump with that specific dump id
        result = self.runner.invoke(
            dump_manager.create_incremental,
            ['--location', self.tempdir, '--dump-id', dump_id])
        self.assertEqual(len(os.listdir(self.tempdir)), 1)
        dump_name = os.listdir(self.tempdir)[0]
        created_dump_id = int(dump_name.split('-')[2])
        self.assertEqual(dump_id, created_dump_id)

        # dump should contain the listen and spark archive
        archive_count = 0
        for file_name in os.listdir(os.path.join(self.tempdir, dump_name)):
            if file_name.endswith('.tar.xz'):
                archive_count += 1
        self.assertEqual(archive_count, 2)
Пример #7
0
    def test_create_incremental(self, mock_notify):
        # create a incremental dump, this won't work because the incremental dump does
        # not have a previous dump
        result = self.runner.invoke(dump_manager.create_incremental,
                                    ['--location', self.tempdir])
        self.assertEqual(result.exit_code, -1)
        self.assertEqual(len(os.listdir(self.tempdir)), 0)

        base = int(time.time())
        dump_id = db_dump.add_dump_entry(base - 60)
        print("%d dump id" % dump_id)
        sleep(1)
        self.listenstore.insert(generate_data(1, self.user_name, base - 30, 5))
        result = self.runner.invoke(dump_manager.create_incremental,
                                    ['--location', self.tempdir])
        self.assertEqual(len(os.listdir(self.tempdir)), 1)
        dump_name = os.listdir(self.tempdir)[0]
        mock_notify.assert_called_with(dump_name, 'incremental')

        # created dump ID should be one greater than previous dump's ID
        created_dump_id = int(dump_name.split('-')[2])
        print("%d created dump id" % created_dump_id)
        self.assertEqual(created_dump_id, dump_id + 1)

        # make sure that the dump contains a full listens and spark dump
        archive_count = 0
        for file_name in os.listdir(os.path.join(self.tempdir, dump_name)):
            if file_name.endswith('.tar.xz'):
                archive_count += 1
        self.assertEqual(archive_count, 2)
 def _create_test_data(self):
     self.log.info("Inserting test data...")
     self.listen = generate_data(self.testuser_id, MIN_ID + 1, 1)[0]
     listen = self.listen.to_json()
     self._redis.redis.setex('playing_now' + ':' + str(listen['user_id']),
                             ujson.dumps(listen).encode('utf-8'), self.config.PLAYING_NOW_MAX_DURATION)
     self.log.info("Test data inserted")
Пример #9
0
    def test_create_full_dump_with_id(self):

        self.listenstore.insert(generate_data(1, self.user_name, 1500000000,
                                              5))
        # if the dump ID does not exist, it should exit with a -1
        result = self.runner.invoke(
            dump_manager.create_full,
            ['--location', self.tempdir, '--dump-id', 1000])
        self.assertEqual(result.exit_code, -1)
        # make sure no directory was created either
        self.assertEqual(len(os.listdir(self.tempdir)), 0)

        # now, add a dump entry to the database and create a dump with that specific dump id
        dump_id = db_dump.add_dump_entry(int(time.time()))
        result = self.runner.invoke(
            dump_manager.create_full,
            ['--location', self.tempdir, '--dump-id', dump_id])
        self.assertEqual(len(os.listdir(self.tempdir)), 1)
        dump_name = os.listdir(self.tempdir)[0]
        created_dump_id = int(dump_name.split('-')[2])
        self.assertEqual(dump_id, created_dump_id)

        # dump should contain the 4 archives
        archive_count = 0
        for file_name in os.listdir(os.path.join(self.tempdir, dump_name)):
            if file_name.endswith('.tar.xz'):
                archive_count += 1
        self.assertEqual(archive_count, 4)
Пример #10
0
    def test_listen_counts_in_cache(self):
        count = self._create_test_data(self.testuser_name)
        self.assertEqual(count, self.logstore.get_listen_count_for_user(self.testuser_name, need_exact=True))
        user_key = '{}{}'.format(self.ns + REDIS_TIMESCALE_USER_LISTEN_COUNT, self.testuser_name)
        self.assertEqual(count, int(cache.get(user_key, decode=False)))

        batch = generate_data(self.testuser_id, self.testuser_name, int(time()), 1)
        self.logstore.insert(batch)
        self.assertEqual(count + 1, int(cache.get(user_key, decode=False)))
    def test_listen_counts_in_cache(self):
        count = self._create_test_data(self.testuser_name)
        self.assertEqual(count, self.logstore.get_listen_count_for_user(self.testuser_name, need_exact=True))
        user_key = '{}{}'.format(REDIS_INFLUX_USER_LISTEN_COUNT, self.testuser_name)
        self.assertEqual(count, int(cache.get(user_key, decode=False)))

        batch = generate_data(self.testuser_id, self.testuser_name, int(time.time()), 1)
        self.logstore.insert(batch)
        self.assertEqual(count + 1, int(cache.get(user_key, decode=False)))
Пример #12
0
    def test_time_range_full_dumps(self):
        listens = generate_data(1, self.testuser_name, 1,
                                5)  # generate 5 listens with ts 1-5
        self.logstore.insert(listens)
        sleep(1)
        between_time = datetime.now()
        sleep(1)
        listens = generate_data(1, self.testuser_name, 6,
                                5)  # generate 5 listens with ts 6-10
        self.logstore.insert(listens)
        sleep(1)
        temp_dir = tempfile.mkdtemp()
        dump_location = self.logstore.dump_listens(
            location=temp_dir,
            dump_id=1,
            end_time=between_time,
        )
        spark_dump_location = self.logstore.dump_listens(
            location=temp_dir,
            dump_id=1,
            end_time=between_time,
            spark_format=True,
        )

        sleep(1)
        self.assertTrue(os.path.isfile(dump_location))
        self.reset_influx_db()
        sleep(1)
        self.logstore.import_listens_dump(dump_location)
        sleep(1)
        listens = self.logstore.fetch_listens(user_name=self.testuser_name,
                                              to_ts=11)
        self.assertEqual(len(listens), 5)
        self.assertEqual(listens[0].ts_since_epoch, 5)
        self.assertEqual(listens[1].ts_since_epoch, 4)
        self.assertEqual(listens[2].ts_since_epoch, 3)
        self.assertEqual(listens[3].ts_since_epoch, 2)
        self.assertEqual(listens[4].ts_since_epoch, 1)

        self.assert_spark_dump_contains_listens(spark_dump_location, 5)
        shutil.rmtree(temp_dir)
    def test_listen_counts_in_cache(self):
        uid = random.randint(2000, 1 << 31)
        testuser = db_user.get_or_create(uid, "user_%d" % uid)
        testuser_name = testuser['musicbrainz_id']
        count = self._create_test_data(testuser_name)
        user_key = REDIS_USER_LISTEN_COUNT + testuser_name
        self.assertEqual(
            count, self.logstore.get_listen_count_for_user(testuser_name))
        self.assertEqual(count, int(cache.get(user_key, decode=False) or 0))

        batch = generate_data(uid, testuser_name, int(time()), 1)
        self.logstore.insert(batch)
        self.assertEqual(count + 1,
                         int(cache.get(user_key, decode=False) or 0))
Пример #14
0
    def test_incremental_dumps_listen_with_no_insert_timestamp(self):
        """ Incremental dumps should only consider listens that have
        inserted_timestamps.
        """
        t = datetime.now()
        sleep(1)
        listens = generate_data(1, self.testuser_name, 1, 5)

        # insert these listens into influx without an insert_timestamp
        influx_rows = [
            listen.to_influx(quote(self.testuser_name)) for listen in listens
        ]
        for row in influx_rows[1:]:
            row['fields'].pop('inserted_timestamp')

        self.logstore.write_points_to_db(influx_rows)
        sleep(1)
        listens_from_influx = self.logstore.fetch_listens(
            user_name=self.testuser_name, to_ts=11)
        self.assertEqual(len(listens_from_influx), 5)

        # incremental dump (with a start time) should not contain these listens
        temp_dir = tempfile.mkdtemp()
        dump_location = self.logstore.dump_listens(
            location=temp_dir,
            dump_id=1,
            start_time=t,
            end_time=datetime.now(),
        )
        spark_dump_location = self.logstore.dump_listens(
            location=temp_dir,
            dump_id=1,
            start_time=t,
            end_time=datetime.now(),
            spark_format=True,
        )
        self.assertTrue(os.path.isfile(dump_location))
        self.reset_influx_db()
        sleep(1)
        self.logstore.import_listens_dump(dump_location)
        sleep(1)
        listens_from_influx = self.logstore.fetch_listens(
            user_name=self.testuser_name, to_ts=11)
        self.assertEqual(len(listens_from_influx), 1)
        self.assert_spark_dump_contains_listens(spark_dump_location, 1)
        shutil.rmtree(temp_dir)
Пример #15
0
    def test_full_dump_listen_with_no_insert_timestamp(self):
        """ We have listens with no `inserted_timestamps` inside the production
        database. This means that full dumps should always be able to dump these
        listens as well. This is a test to test that.
        """
        listens = generate_data(1, self.testuser_name, 1, 5)

        # insert these listens into influx without an insert_timestamp
        influx_rows = [
            listen.to_influx(quote(self.testuser_name)) for listen in listens
        ]
        for row in influx_rows[1:]:
            row['fields'].pop('inserted_timestamp')

        t = datetime.now()
        self.logstore.write_points_to_db(influx_rows)
        sleep(1)
        listens_from_influx = self.logstore.fetch_listens(
            user_name=self.testuser_name, to_ts=11)
        self.assertEqual(len(listens_from_influx), 5)

        # full dump (with no start time) should contain these listens
        temp_dir = tempfile.mkdtemp()
        dump_location = self.logstore.dump_listens(
            location=temp_dir,
            dump_id=1,
            end_time=datetime.now(),
        )
        spark_dump_location = self.logstore.dump_listens(
            location=temp_dir,
            dump_id=1,
            end_time=datetime.now(),
            spark_format=True,
        )
        self.assertTrue(os.path.isfile(dump_location))
        self.reset_influx_db()
        sleep(1)
        self.logstore.import_listens_dump(dump_location)
        sleep(1)
        listens_from_influx = self.logstore.fetch_listens(
            user_name=self.testuser_name, to_ts=11)
        self.assertEqual(len(listens_from_influx), 5)
        self.assert_spark_dump_contains_listens(spark_dump_location, 5)
        shutil.rmtree(temp_dir)
Пример #16
0
    def test_create_full_db(self, mock_notify):

        listens = generate_data(1, self.user_name, 1500000000, 5)
        self.listenstore.insert(listens)
        sleep(1)

        # create a full dump
        self.runner.invoke(dump_manager.create_full,
                           ['--location', self.tempdir])
        self.assertEqual(len(os.listdir(self.tempdir)), 1)
        dump_name = os.listdir(self.tempdir)[0]
        mock_notify.assert_called_with(dump_name, 'fullexport')

        # make sure that the dump contains a full listens dump, a public dump
        # a private dump and a spark dump.
        archive_count = 0
        for file_name in os.listdir(os.path.join(self.tempdir, dump_name)):
            if file_name.endswith('.tar.xz'):
                archive_count += 1
        self.assertEqual(archive_count, 4)
 def _create_test_data(self, from_ts=MIN_ID + 1, num_listens=random.randint(1, 100)):
     self.log.info("Inserting test data...")
     test_data = generate_data(self.testuser_id, from_ts, num_listens)
     self.logstore.insert(test_data)
     self.log.info("Test data inserted")
     return from_ts, num_listens