Пример #1
0
    def get_by_mbid(cls, mbid):
        """ Fetches the artist and releases from MB if not in the database. """
        if mbid in cls.blacklisted:
            raise cls.Blacklisted()

        try:
            return cls.objects.get(mbid=mbid)
        except cls.DoesNotExist:
            pass

        artist_data = mb.get_artist(mbid)
        if artist_data is None:
            return None
        if not artist_data:
            raise cls.Unknown

        artist = Artist(
            mbid=mbid, name=artist_data['name'], sort_name=artist_data['sort-name'],
            disambiguation=artist_data.get('disambiguation', ''))
        try:
            artist.save()
        except IntegrityError:
            # The artist was added while we were querying MB.
            return cls.objects.get(mbid=mbid)

        # Add a few release groups immediately.
        # Sleep 1s to comply with the MB web service.
        sleep(1)
        LIMIT = 100
        release_groups = mb.get_release_groups(mbid, limit=LIMIT, offset=0)
        if release_groups:
            with transaction.commit_on_success():
                for rg_data in release_groups:
                    # Ignoring releases without a release date or a type.
                    if rg_data.get('first-release-date') and rg_data.get('type'):
                        release_group = ReleaseGroup(
                            artist=artist,
                            mbid=rg_data['id'],
                            name=rg_data['title'],
                            type=rg_data['type'],
                            date=str_to_date(rg_data['first-release-date']),
                            is_deleted=False)
                        release_group.save()

        if release_groups is None or len(release_groups) == LIMIT:
            # Add the remaining release groups
            Job.add_release_groups(artist)

        return artist
Пример #2
0
def check():
    logging.info('Start checking artists')
    checked_artists = 0
    checked_release_groups = 0
    day = datetime.datetime.utcnow().day
    artist = None
    while True:

        # Get the next artist.
        artists = Artist.objects.order_by('mbid')
        if artist:
            artists = artists.filter(mbid__gt=artist.mbid)
        try:
            artist = artists[0]
        except IndexError:
            break # last artist

        checked_artists += 1

        # Artist names don't change that often. Update artists at most once
        # a month, unless we are debugging.
        if DEBUG or day == 1:
            jobs.process()
            tools.sleep()
            logging.info('Updating artist %s' % artist.mbid)
            artist_data = mb.get_artist(artist.mbid)
            if not artist_data:
                # TODO: musicbrainz/network error or deleted?
                logging.warning('Could not fetch artist data')
            elif artist_data['id'] != artist.mbid:
                # Requested and returned mbids are different if the artist has been merged.
                logging.info('Merging into artist %s' % artist_data['id'])
                try:
                    new_artist = Artist.get_by_mbid(artist_data['id'])
                except (Artist.Blacklisted, Artist.Unknown):
                    continue
                if not new_artist:
                    continue
                cursor = connection.cursor()
                cursor.execute(
                    """
                    UPDATE OR REPLACE "app_userartist"
                    SET "artist_id" = %s
                    WHERE "artist_id" = %s
                    """, [new_artist.id, artist.id])
                # Mark release groups as deleted.
                n = artist.releasegroup_set.update(is_deleted=True)
                logging.info('Deleted %s release groups' % n)
                continue
            else:
                # Update artist info if changed.
                updated = False
                if artist.name != artist_data['name']:
                    artist.name = artist_data['name']
                    updated = True
                if artist.sort_name != artist_data['sort-name']:
                    artist.sort_name = artist_data['sort-name']
                    updated = True
                if artist.disambiguation != artist_data.get('disambiguation', ''):
                    artist.disambiguation = artist_data.get('disambiguation', '')
                    updated = True
                if updated:
                    logging.info('Artist changed, updating')
                    artist.save()
        else:
            logging.info('Checking artist %s' % artist.mbid)

        current = {rg.mbid: rg for rg in ReleaseGroup.objects.filter(artist=artist)}

        # Get release groups
        LIMIT = 100
        offset = 0
        while True:
            jobs.process()
            tools.sleep()
            release_groups = mb.get_release_groups(artist.mbid, LIMIT, offset)
            if release_groups is None:
                logging.warning('Could not fetch release groups, retrying')
                continue
            logging.info('Fetched %s release groups' % len(release_groups))
            with transaction.commit_on_success():
                for rg_data in release_groups:
                    mbid = rg_data['id']
                    # Ignore releases without a release date or a type.
                    if not rg_data.get('first-release-date') or not rg_data.get('type'):
                        if mbid in current:
                            release_group = current[mbid]
                            if not release_group.is_deleted:
                                release_group.is_deleted = True
                                release_group.save()
                                logging.info('Deleted release group %s' % mbid)
                        continue

                    checked_release_groups += 1
                    release_date = str_to_date(rg_data['first-release-date'])
                    if mbid in current:
                        release_group = current[mbid]

                        updated = False
                        if release_group.is_deleted:
                            release_group.is_deleted = False
                            updated = True
                        # Work-around MBS-4285.
                        if release_group.name != rg_data['title'] and rg_data['title']:
                            release_group.name = rg_data['title']
                            updated = True
                        if release_group.type != rg_data['type']:
                            release_group.type = rg_data['type']
                            updated = True
                        if release_group.date != release_date:
                            release_group.date = release_date
                            updated = True
                        if updated:
                            release_group.save()
                            logging.info('Updated release group %s' % mbid)

                        del current[mbid]
                    else:
                        release_group = ReleaseGroup(
                            artist=artist,
                            mbid=rg_data['id'],
                            name=rg_data['title'],
                            type=rg_data['type'],
                            date=release_date,
                            is_deleted=False)
                        release_group.save()
                        logging.info('Created release group %s' % mbid)

                        # Notify users
                        cursor = connection.cursor()
                        cursor.execute(
                            """
                            INSERT INTO "app_notification" ("user_id", "release_group_id")
                            SELECT "app_userartist"."user_id", "app_releasegroup"."id"
                            FROM "app_userartist"
                            JOIN "app_artist" ON "app_artist"."id" = "app_userartist"."artist_id"
                            JOIN "app_releasegroup" ON "app_releasegroup"."artist_id" = "app_artist"."id"
                            WHERE "app_releasegroup"."id" = %s
                            """, [release_group.id])
                        logging.info('Notified %d users' % cursor.rowcount)

            if len(release_groups) < LIMIT: break
            offset += LIMIT

        with transaction.commit_on_success():
            for mbid in current:
                release_group = current[mbid]
                if not release_group.is_deleted:
                    release_group.is_deleted = True
                    release_group.save()
                    logging.info('Deleted release group %s' % mbid)

    logging.info('Checked %d artists and %d release groups' % (checked_artists, checked_release_groups))
Пример #3
0
def check():
    logging.info('Start checking artists')
    sent_notifications = 0
    checked_artists = 0
    checked_release_groups = 0
    day = datetime.datetime.utcnow().day
    artist = None
    while True:

        # Get the next artist.
        artists = Artist.objects.order_by('mbid')
        if artist:
            artists = artists.filter(mbid__gt=artist.mbid)
        try:
            artist = artists[0]
        except IndexError:
            break # last artist

        checked_artists += 1

        # Artist names don't change that often. Update artists at most 3 times
        # a month, unless we are debugging.
        if DEBUG or day in (1, 11, 21):
            jobs.process()
            tools.sleep()
            logging.info('Updating artist %s' % artist.mbid)
            artist_data = mb.get_artist(artist.mbid)
            if not artist_data:
                # TODO: musicbrainz/network error or deleted?
                logging.warning('Could not fetch artist data')
            elif artist_data['id'] != artist.mbid:
                # Requested and returned mbids are different if the artist has been merged.
                logging.info('Merging into artist %s' % artist_data['id'])
                try:
                    new_artist = Artist.get_by_mbid(artist_data['id'])
                except (Artist.Blacklisted, Artist.Unknown):
                    continue
                if not new_artist:
                    continue
                cursor = connection.cursor()
                cursor.execute(
                    """
                    UPDATE OR REPLACE "app_userartist"
                    SET "artist_id" = %s
                    WHERE "artist_id" = %s
                    """, [new_artist.id, artist.id])
                # Delete the artist and its release groups.
                # Use SQL, delete() is buggy, see Django bug #16426.
                # TODO: possible FK constraint fail in app_star.
                cursor.execute(
                    """
                    DELETE FROM "app_releasegroup"
                    WHERE "artist_id" = %s
                    """, [artist.id])
                logging.info('Deleted release groups')
                cursor.execute(
                    """
                    DELETE FROM "app_artist"
                    WHERE "id" = %s
                    """, [artist.id])
                logging.info('Deleted the artist')
                continue
            else:
                # Update artist info if changed.
                updated = False
                if artist.name != artist_data['name']:
                    artist.name = artist_data['name']
                    updated = True
                if artist.sort_name != artist_data['sort-name']:
                    artist.sort_name = artist_data['sort-name']
                    updated = True
                if artist.disambiguation != artist_data.get('disambiguation', ''):
                    artist.disambiguation = artist_data.get('disambiguation', '')
                    updated = True
                if updated:
                    logging.info('Artist changed, updating')
                    artist.save()
        else:
            logging.info('Checking artist %s' % artist.mbid)

        current = {rg.mbid: rg for rg in ReleaseGroup.objects.filter(artist=artist)}

        # Get release groups
        LIMIT = 100
        offset = 0
        while True:
            sent_notifications += notifications.send()
            release_groups = mb.get_release_groups(artist.mbid, LIMIT, offset)
            if release_groups is None:
                logging.warning('Could not fetch release groups, retrying')
                continue
            logging.info('Fetched %s release groups' % len(release_groups))
            with transaction.commit_on_success():
                for rg_data in release_groups:
                    mbid = rg_data['id']
                    # Ignore releases without a release date or a type.
                    release_date = str_to_date(rg_data.get('first-release-date'))
                    if not release_date or not rg_data.get('type'):
                        if mbid in current:
                            release_group = current[mbid]
                            if not release_group.is_deleted:
                                release_group.is_deleted = True
                                release_group.save()
                                logging.info('Deleted release group %s' % mbid)
                        continue

                    checked_release_groups += 1
                    if mbid in current:
                        release_group = current[mbid]

                        updated = False
                        if release_group.is_deleted:
                            release_group.is_deleted = False
                            updated = True
                        # Work-around MBS-4285.
                        if release_group.name != rg_data['title'] and rg_data['title']:
                            release_group.name = rg_data['title']
                            updated = True
                        if release_group.type != rg_data['type']:
                            release_group.type = rg_data['type']
                            updated = True
                        if release_group.date != release_date:
                            release_group.date = release_date
                            updated = True
                        if updated:
                            release_group.save()
                            logging.info('Updated release group %s' % mbid)

                        del current[mbid]
                    elif rg_data['title']:
                        release_group = ReleaseGroup(
                            artist=artist,
                            mbid=rg_data['id'],
                            name=rg_data['title'],
                            type=rg_data['type'],
                            date=release_date,
                            is_deleted=False)
                        release_group.save()
                        logging.info('Created release group %s' % mbid)

                        # Notify users
                        cursor = connection.cursor()
                        cursor.execute(
                            """
                            INSERT INTO "app_notification" ("user_id", "release_group_id")
                            SELECT "app_userartist"."user_id", "app_releasegroup"."id"
                            FROM "app_userartist"
                            JOIN "app_artist" ON "app_artist"."id" = "app_userartist"."artist_id"
                            JOIN "app_releasegroup" ON "app_releasegroup"."artist_id" = "app_artist"."id"
                            WHERE "app_releasegroup"."id" = %s
                            """, [release_group.id])
                        logging.info('Will notify %d users' % cursor.rowcount)

            if len(release_groups) < LIMIT: break
            offset += LIMIT

        with transaction.commit_on_success():
            for mbid in current:
                release_group = current[mbid]
                if not release_group.is_deleted:
                    release_group.is_deleted = True
                    release_group.save()
                    logging.info('Deleted release group %s' % mbid)

    return (checked_artists, checked_release_groups, sent_notifications)