示例#1
0
def delete_user(bigquery_connection, musicbrainz_id):
    """ Deletes all listens for user with specified MusicBrainz ID from Google BigQuery.

    Args:
        bigquery: the bigquery connection object
        musicbrainz_id (str): the MusicBrainz ID of the user
    """
    query = """DELETE FROM {dataset}.{table}
                     WHERE user_name = @user_name
            """.format(dataset=config.BIGQUERY_DATASET_ID, table=config.BIGQUERY_TABLE_ID)

    parameters = [{
        'name': 'user_name',
        'type': 'STRING',
        'value': musicbrainz_id,
    }]

    bigquery.run_query(bigquery_connection, query, parameters, dml=True)
示例#2
0
def get_top_releases(bigquery_connection, musicbrainz_id, time_interval=None):
    """ Get top releases for user with given MusicBrainz ID over a particular period of time

        Args: bigquery_connection: the bigquery connection object
              musicbrainz_id (str): the MusicBrainz ID of the user
              time_interval  (str): the time interval over which top releases should be returned
                                    (defaults to all time)

        Returns: A sorted list of dicts with the following structure
                [
                    {
                        'artist_name' (str),
                        'artist_msid' (uuid),
                        'artist_mbids' (string of comma seperated uuids),
                        'release_name' (str),
                        'release_msid' (uuid),
                        'release_mbid' (uuid),
                        'listen_count' (int)
                    }
                ]
    """

    filter_clause = ""
    if time_interval:
        filter_clause = "AND listened_at >= TIMESTAMP_SUB(CURRENT_TIME(), INTERVAL {})".format(time_interval)


    query = """SELECT artist_name
                    , artist_msid
                    , artist_mbids
                    , release_name
                    , release_msid
                    , release_mbid
                    , COUNT(release_msid) as listen_count
                 FROM {dataset_id}.{table_id}
                WHERE user_name = @musicbrainz_id
                {time_filter_clause}
             GROUP BY artist_msid, artist_mbids, artist_name, release_name, release_msid, release_mbid
             ORDER BY listen_count DESC
                LIMIT {limit}
            """.format(
                    dataset_id=config.BIGQUERY_DATASET_ID,
                    table_id=config.BIGQUERY_TABLE_ID,
                    time_filter_clause=filter_clause,
                    limit=config.STATS_ENTITY_LIMIT,
                )

    parameters = [
        {
            'type': 'STRING',
            'name': 'musicbrainz_id',
            'value': musicbrainz_id
        }
    ]

    return bigquery.run_query(bigquery_connection, query, parameters)
示例#3
0
def delete_user(bigquery_connection, musicbrainz_id):
    """ Deletes all listens for user with specified MusicBrainz ID from Google BigQuery.

    Args:
        bigquery: the bigquery connection object
        musicbrainz_id (str): the MusicBrainz ID of the user
    """
    query = """DELETE FROM {dataset}.{table}
                     WHERE user_name = @user_name
            """.format(dataset=config.BIGQUERY_DATASET_ID,
                       table=config.BIGQUERY_TABLE_ID)

    parameters = [{
        'name': 'user_name',
        'type': 'STRING',
        'value': musicbrainz_id,
    }]

    bigquery.run_query(bigquery_connection, query, parameters, dml=True)
示例#4
0
def get_top_releases(bigquery_connection, musicbrainz_id, time_interval=None):
    """ Get top releases for user with given MusicBrainz ID over a particular period of time

        Args: bigquery_connection: the bigquery connection object
              musicbrainz_id (str): the MusicBrainz ID of the user
              time_interval  (str): the time interval over which top releases should be returned
                                    (defaults to all time)

        Returns: A sorted list of dicts with the following structure
                [
                    {
                        'artist_name' (str),
                        'artist_msid' (uuid),
                        'artist_mbids' (string of comma seperated uuids),
                        'release_name' (str),
                        'release_msid' (uuid),
                        'release_mbid' (uuid),
                        'listen_count' (int)
                    }
                ]
    """

    filter_clause = ""
    if time_interval:
        filter_clause = "AND listened_at >= TIMESTAMP_SUB(CURRENT_TIME(), INTERVAL {})".format(
            time_interval)

    query = """SELECT artist_name
                    , artist_msid
                    , artist_mbids
                    , release_name
                    , release_msid
                    , release_mbid
                    , COUNT(release_msid) as listen_count
                 FROM {dataset_id}.{table_id}
                WHERE user_name = @musicbrainz_id
                {time_filter_clause}
             GROUP BY artist_msid, artist_mbids, artist_name, release_name, release_msid, release_mbid
             ORDER BY listen_count DESC
                LIMIT {limit}
            """.format(
        dataset_id=config.BIGQUERY_DATASET_ID,
        table_id=config.BIGQUERY_TABLE_ID,
        time_filter_clause=filter_clause,
        limit=config.STATS_ENTITY_LIMIT,
    )

    parameters = [{
        'type': 'STRING',
        'name': 'musicbrainz_id',
        'value': musicbrainz_id
    }]

    return bigquery.run_query(bigquery_connection, query, parameters)
示例#5
0
def get_artist_count(bigquery_connection):
    """ Calculates the total number of artists submitted to ListenBrainz.

        Args:
            bigquery_connection: the bigquery connection object

        Returns:
            artist_count (int)
    """

    query = """SELECT COUNT(DISTINCT(artist_msid)) as artist_count
                 FROM {dataset_id}.{table_id}
            """.format(
                dataset_id=config.BIGQUERY_DATASET_ID,
                table_id=config.BIGQUERY_TABLE_ID,
            )

    return bigquery.run_query(bigquery_connection, query)[0]['artist_count']
示例#6
0
    def test_run_query_done(self, mock_current_app):
        """ Test run_query when the result is directly returned by the first api call to bigquery.jobs.query """

        mock_bigquery = MagicMock()
        # set the value returned by call to bigquery to a response which signifies completed query
        mock_bigquery.jobs.return_value.query.return_value.execute.return_value = bigquery_responses['done']

        # construct query and parameters
        query = """SELECT artist_msid, artist_name
                     FROM {dataset_id}.{table_id}
                    WHERE user_name = @username
                """.format(dataset_id=config.BIGQUERY_DATASET_ID, table_id=config.BIGQUERY_TABLE_ID)

        parameters = [{
            'name': 'username',
            'type': 'STRING',
            'value': 'testuser'
        }]

        result = bigquery.run_query(mock_bigquery, query, parameters)
        self.assertListEqual(result, expected_results['done'])
示例#7
0
def get_artist_count(bigquery_connection, musicbrainz_id, time_interval=None):
    """ Get artist count for user with given MusicBrainz ID over a particular period of time

        Args:
            bigquery_connection: the bigquery connection object
            musicbrainz_id (str): the MusicBrainz ID of the user
            time_interval  (str): the time interval over which artist count should be returned
                                    (defaults to all time)

        Returns: artist_count (int): total number of artists listened to by the user in that
                                     period of time
    """

    filter_clause = ""
    if time_interval:
        filter_clause = "AND listened_at >= TIMESTAMP_SUB(CURRENT_TIME(), INTERVAL {})".format(time_interval)

    query = """SELECT COUNT(DISTINCT(artist_msid)) as artist_count
                 FROM {dataset_id}.{table_id}
                WHERE user_name = @musicbrainz_id
                {time_filter_clause}
            """.format(
                    dataset_id=config.BIGQUERY_DATASET_ID,
                    table_id=config.BIGQUERY_TABLE_ID,
                    time_filter_clause=filter_clause,
                )

    parameters = [
        {
            'name': 'musicbrainz_id',
            'type': 'STRING',
            'value': musicbrainz_id,
        }
    ]

    return bigquery.run_query(bigquery_connection, query, parameters)[0]['artist_count']
示例#8
0
def get_artist_count(bigquery_connection, musicbrainz_id, time_interval=None):
    """ Get artist count for user with given MusicBrainz ID over a particular period of time

        Args:
            bigquery_connection: the bigquery connection object
            musicbrainz_id (str): the MusicBrainz ID of the user
            time_interval  (str): the time interval over which artist count should be returned
                                    (defaults to all time)

        Returns: artist_count (int): total number of artists listened to by the user in that
                                     period of time
    """

    filter_clause = ""
    if time_interval:
        filter_clause = "AND listened_at >= TIMESTAMP_SUB(CURRENT_TIME(), INTERVAL {})".format(
            time_interval)

    query = """SELECT COUNT(DISTINCT(artist_msid)) as artist_count
                 FROM {dataset_id}.{table_id}
                WHERE user_name = @musicbrainz_id
                {time_filter_clause}
            """.format(
        dataset_id=config.BIGQUERY_DATASET_ID,
        table_id=config.BIGQUERY_TABLE_ID,
        time_filter_clause=filter_clause,
    )

    parameters = [{
        'name': 'musicbrainz_id',
        'type': 'STRING',
        'value': musicbrainz_id,
    }]

    return bigquery.run_query(bigquery_connection, query,
                              parameters)[0]['artist_count']
示例#9
0
def get_top_recordings(bigquery_connection, musicbrainz_id, time_interval=None):
    """ Get top recordings of user with given MusicBrainz ID over a particular time interval

        Args:
            bigquery_connection: the bigquery connection object
            musicbrainz_id (str): The MusicBrainz ID of the user
            time_interval  (str): Interval in the BigQuery interval format
                                  which can be seen here:
                                  https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#timestamp_sub

        Returns:
            sorted list of the following format
            [
                {
                    'track_name' (str)
                    'recording_msid' (uuid)
                    'recording_mbid' (uuid)
                    'artist_name' (str)
                    'artist_msid' (uuid)
                    'artist_mbids' (string of comma-seperated uuids)
                    'listen_count' (int)
                }
            ]
    """

    filter_clause = ""
    if time_interval:
        filter_clause = "AND listened_at >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL {})".format(time_interval)

    query = """SELECT artist_name
                    , artist_msid
                    , artist_mbids
                    , recording_msid
                    , recording_mbid
                    , track_name
                    , COUNT(recording_msid) as listen_count
                 FROM {dataset_id}.{table_id}
                WHERE user_name = @musicbrainz_id
                {time_filter_clause}
             GROUP BY recording_msid, recording_mbid, track_name, artist_name, artist_msid, artist_mbids
             ORDER BY listen_count DESC
                LIMIT {limit}
            """.format(
                dataset_id=config.BIGQUERY_DATASET_ID,
                table_id=config.BIGQUERY_TABLE_ID,
                time_filter_clause=filter_clause,
                limit=config.STATS_ENTITY_LIMIT
            )

    # construct the parameters that must be passed to the Google BigQuery API

    # start with a list of parameters to pass and then convert it to standard format
    # required by Google BigQuery
    parameters = [
        {
            "type": "STRING",
            "name": "musicbrainz_id",
            "value": musicbrainz_id,
        },
    ]

    return bigquery.run_query(bigquery_connection, query, parameters)
示例#10
0
def get_top_recordings(bigquery_connection,
                       musicbrainz_id,
                       time_interval=None):
    """ Get top recordings of user with given MusicBrainz ID over a particular time interval

        Args:
            bigquery_connection: the bigquery connection object
            musicbrainz_id (str): The MusicBrainz ID of the user
            time_interval  (str): Interval in the BigQuery interval format
                                  which can be seen here:
                                  https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#timestamp_sub

        Returns:
            sorted list of the following format
            [
                {
                    'track_name' (str)
                    'recording_msid' (uuid)
                    'recording_mbid' (uuid)
                    'artist_name' (str)
                    'artist_msid' (uuid)
                    'artist_mbids' (string of comma-seperated uuids)
                    'listen_count' (int)
                }
            ]
    """

    filter_clause = ""
    if time_interval:
        filter_clause = "AND listened_at >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL {})".format(
            time_interval)

    query = """SELECT artist_name
                    , artist_msid
                    , artist_mbids
                    , recording_msid
                    , recording_mbid
                    , track_name
                    , COUNT(recording_msid) as listen_count
                 FROM {dataset_id}.{table_id}
                WHERE user_name = @musicbrainz_id
                {time_filter_clause}
             GROUP BY recording_msid, recording_mbid, track_name, artist_name, artist_msid, artist_mbids
             ORDER BY listen_count DESC
                LIMIT {limit}
            """.format(dataset_id=config.BIGQUERY_DATASET_ID,
                       table_id=config.BIGQUERY_TABLE_ID,
                       time_filter_clause=filter_clause,
                       limit=config.STATS_ENTITY_LIMIT)

    # construct the parameters that must be passed to the Google BigQuery API

    # start with a list of parameters to pass and then convert it to standard format
    # required by Google BigQuery
    parameters = [
        {
            "type": "STRING",
            "name": "musicbrainz_id",
            "value": musicbrainz_id,
        },
    ]

    return bigquery.run_query(bigquery_connection, query, parameters)