def test_dump_postgres_db_table_entries(self):
     db_user.create('test_user')
     timestamp = datetime.today()
     location = db_dump.dump_postgres_db(self.tempdir, dump_time=timestamp)
     dump_entries = db_dump.get_dump_entries()
     self.assertEqual(len(dump_entries), 1)
     self.assertEqual(dump_entries[0]['created'].strftime('%s'), timestamp.strftime('%s'))
 def test_dump_postgres_db_table_entries(self):
     with self.app.app_context():
         db_user.create(1, 'test_user')
         timestamp = datetime.today()
         location = db_dump.dump_postgres_db(self.tempdir, dump_time=timestamp)
         dump_entries = db_dump.get_dump_entries()
         self.assertEqual(len(dump_entries), 1)
         self.assertEqual(dump_entries[0]['created'].strftime('%s'), timestamp.strftime('%s'))
def create_full(location, threads, dump_id, last_dump_id):
    """ Create a ListenBrainz data dump which includes a private dump, a statistics dump
        and a dump of the actual listens from InfluxDB

        Args:
            location (str): path to the directory where the dump should be made
            threads (int): the number of threads to be used while compression
            dump_id (int): the ID of the ListenBrainz data dump
            last_dump_id (bool): flag indicating whether to create a full dump from the last entry in the dump table
    """
    app = create_app()
    with app.app_context():
        from listenbrainz.webserver.influx_connection import _influx as ls
        if last_dump_id:
            all_dumps = db_dump.get_dump_entries()
            if len(all_dumps) == 0:
                current_app.logger.error(
                    "Cannot create full dump with last dump's ID, no dump exists!"
                )
                sys.exit(-1)
            dump_id = all_dumps[0]['id']

        if dump_id is None:
            end_time = datetime.now()
            dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s')))
        else:
            dump_entry = db_dump.get_dump_entry(dump_id)
            if dump_entry is None:
                current_app.logger.error("No dump with ID %d found", dump_id)
                sys.exit(-1)
            end_time = dump_entry['created']

        dump_path = os.path.join(
            location, 'listenbrainz-dump-{dump_id}-{time}-full'.format(
                dump_id=dump_id, time=end_time.strftime('%Y%m%d-%H%M%S')))
        create_path(dump_path)
        db_dump.dump_postgres_db(dump_path, end_time, threads)
        ls.dump_listens(dump_path,
                        dump_id=dump_id,
                        end_time=end_time,
                        threads=threads,
                        spark_format=False)
        ls.dump_listens(dump_path,
                        dump_id=dump_id,
                        end_time=end_time,
                        threads=threads,
                        spark_format=True)
        try:
            write_hashes(dump_path)
        except IOError as e:
            current_app.logger.error('Unable to create hash files! Error: %s',
                                     str(e),
                                     exc_info=True)
            return
        current_app.logger.info('Dumps created and hashes written at %s' %
                                dump_path)
示例#4
0
 def test_copy_table(self):
     db_dump.add_dump_entry(datetime.today().strftime('%s'))
     with db.engine.connect() as connection:
         db_dump.copy_table(
             cursor=connection.connection.cursor(),
             location=self.tempdir,
             columns='id, created',
             table_name='data_dump',
         )
     dumps = db_dump.get_dump_entries()
     with open(os.path.join(self.tempdir, 'data_dump'), 'r') as f:
         file_contents = [line for line in f]
     self.assertEqual(len(dumps), len(file_contents))
 def test_copy_table(self):
     db_dump.add_dump_entry(datetime.today().strftime('%s'))
     with db.engine.connect() as connection:
         db_dump.copy_table(
             cursor=connection.connection.cursor(),
             location=self.tempdir,
             columns='id, created',
             table_name='data_dump',
         )
     dumps = db_dump.get_dump_entries()
     with open(os.path.join(self.tempdir, 'data_dump'), 'r') as f:
         file_contents = [line for line in f]
     self.assertEqual(len(dumps), len(file_contents))
示例#6
0
def get_dump_info():
    """
    Get information about ListenBrainz data dumps.
    You need to pass the `id` parameter in a GET request to get data about that particular
    dump.

    **Example response**:

    .. code-block:: json

        {
            "id": 1,
            "timestamp": "20190625-165900"
        }

    :query id: Integer specifying the ID of the dump, if not provided, the endpoint returns information about the latest data dump.
    :statuscode 200: You have data.
    :statuscode 400: You did not provide a valid dump ID. See error message for details.
    :statuscode 404: Dump with given ID does not exist.
    :resheader Content-Type: *application/json*
    """

    dump_id = request.args.get("id")
    if dump_id is None:
        try:
            dump = db_dump.get_dump_entries()[0]  # return the latest dump
        except IndexError:
            raise APINotFound("No dump entry exists.")
    else:
        try:
            dump_id = int(dump_id)
        except ValueError:
            raise APIBadRequest("The `id` parameter needs to be an integer.")
        dump = db_dump.get_dump_entry(dump_id)
        if dump is None:
            raise APINotFound("No dump exists with ID: %d" % dump_id)

    return jsonify({
        "id":
        dump["id"],
        "timestamp":
        _convert_timestamp_to_string_dump_format(dump["created"]),
    })
示例#7
0
def create_full(location, threads, dump_id, last_dump_id):
    """ Create a ListenBrainz data dump which includes a private dump, a statistics dump
        and a dump of the actual listens from the listenstore

        Args:
            location (str): path to the directory where the dump should be made
            threads (int): the number of threads to be used while compression
            dump_id (int): the ID of the ListenBrainz data dump
            last_dump_id (bool): flag indicating whether to create a full dump from the last entry in the dump table
    """
    app = create_app()
    with app.app_context():
        from listenbrainz.webserver.timescale_connection import _ts as ls
        if last_dump_id:
            all_dumps = db_dump.get_dump_entries()
            if len(all_dumps) == 0:
                current_app.logger.error(
                    "Cannot create full dump with last dump's ID, no dump exists!"
                )
                sys.exit(-1)
            dump_id = all_dumps[0]['id']

        if dump_id is None:
            end_time = datetime.now()
            dump_id = db_dump.add_dump_entry(int(end_time.strftime('%s')))
        else:
            dump_entry = db_dump.get_dump_entry(dump_id)
            if dump_entry is None:
                current_app.logger.error("No dump with ID %d found", dump_id)
                sys.exit(-1)
            end_time = dump_entry['created']

        ts = end_time.strftime('%Y%m%d-%H%M%S')
        dump_name = 'listenbrainz-dump-{dump_id}-{time}-full'.format(
            dump_id=dump_id, time=ts)
        dump_path = os.path.join(location, dump_name)
        create_path(dump_path)
        db_dump.dump_postgres_db(dump_path, end_time, threads)

        listens_dump_file = ls.dump_listens(dump_path,
                                            dump_id=dump_id,
                                            end_time=end_time,
                                            threads=threads)
        spark_dump_file = 'listenbrainz-listens-dump-{dump_id}-{time}-spark-full.tar.xz'.format(
            dump_id=dump_id, time=ts)
        spark_dump_path = os.path.join(location, dump_path, spark_dump_file)
        transmogrify_dump_file_to_spark_import_format(listens_dump_file,
                                                      spark_dump_path, threads)

        try:
            write_hashes(dump_path)
        except IOError as e:
            current_app.logger.error('Unable to create hash files! Error: %s',
                                     str(e),
                                     exc_info=True)
            sys.exit(-1)

        try:
            if not sanity_check_dumps(dump_path, 12):
                return sys.exit(-1)
        except OSError as e:
            sys.exit(-1)

        # if in production, send an email to interested people for observability
        send_dump_creation_notification(dump_name, 'fullexport')

        current_app.logger.info('Dumps created and hashes written at %s' %
                                dump_path)

        # Write the DUMP_ID file so that the FTP sync scripts can be more robust
        with open(os.path.join(dump_path, "DUMP_ID.txt"), "w") as f:
            f.write("%s %s full\n" % (ts, dump_id))

        sys.exit(0)
示例#8
0
 def test_add_dump_entry(self):
     prev_dumps = db_dump.get_dump_entries()
     db_dump.add_dump_entry(datetime.today().strftime('%s'))
     now_dumps = db_dump.get_dump_entries()
     self.assertEqual(len(now_dumps), len(prev_dumps) + 1)
 def test_add_dump_entry(self):
     prev_dumps = db_dump.get_dump_entries()
     db_dump.add_dump_entry()
     now_dumps = db_dump.get_dump_entries()
     self.assertEqual(len(now_dumps), len(prev_dumps) + 1)
示例#10
0
 def test_add_dump_entry(self):
     prev_dumps = db_dump.get_dump_entries()
     db_dump.add_dump_entry(datetime.today().strftime('%s'))
     now_dumps = db_dump.get_dump_entries()
     self.assertEqual(len(now_dumps), len(prev_dumps) + 1)