def get_cover(mbid): logging.info('[JOB] Trying to find a cover for %s' % mbid) tools.sleep() logging.info('[JOB] Get releases') releases = mb.get_releases(mbid, limit=100, offset=0) if releases is None: logging.warning('[ERR] Could not get releases, skipping') return releases = [r for r in releases if r.get('date')] # Order releases by date. def by_date(a, b): # Convert 2011 to 2011-99-99 and 2011-01 to 2011-01-99. d1, d2 = a['date'], b['date'] while len(d1) < 10: d1 += '-99' while len(d2) < 10: d2 += '-99' return cmp(d1, d2) releases = sorted(releases, cmp=by_date) # We don't want to check all 100 releases. releases = [r['id'] for r in releases][:10] url = None for release in releases: tools.sleep() logging.info('[JOB] Checking release %s' % release) try: request = Request('http://musicbrainz.org/release/' + release, headers={'User-Agent': 'muspy/2.0'}) response = urlopen(request) html = response.read() except: logging.warning('[ERR] Could not fetch the release page, skipping') continue # Parsing the release page pattern = r'<div class="cover-art">\s*<img src="(?P<url>[^"]+)"' match = re.search(pattern, html) if not match: logging.info('[JOB] No cover art, skipping') continue url = match.group('url') if _fetch_cover(mbid, url): return logging.info('[JOB] Try to get cover from Last.fm') for rg in ReleaseGroup.objects.filter(mbid=mbid).select_related('artist'): urls = lastfm.get_cover_urls(rg.artist.name, rg.name) or [] for url in urls: if _fetch_cover(mbid, url): return logging.warning('[ERR] Could not find a cover')
def get_cover(mbid): logging.info('[JOB] Trying to find a cover for %s' % mbid) tools.sleep() logging.info('[JOB] Get releases') releases = mb.get_releases(mbid, limit=100, offset=0) if releases is None: logging.warning('[ERR] Could not get releases, skipping') return releases = [r for r in releases if r.get('date')] # Order releases by date. def by_date(a, b): # Convert 2011 to 2011-99-99 and 2011-01 to 2011-01-99. d1, d2 = a['date'], b['date'] while len(d1) < 10: d1 += '-99' while len(d2) < 10: d2 += '-99' return cmp(d1, d2) releases = sorted(releases, cmp=by_date) # We don't want to check all 100 releases. releases = [r['id'] for r in releases][:10] url = None for release in releases: tools.sleep() logging.info('[JOB] Checking release %s' % release) try: request = Request( 'http://musicbrainz.org/release/' + release, headers = {'User-Agent': 'muspy/2.0'}) response = urlopen(request) html = response.read() except: logging.warning('[ERR] Could not fetch the release page, skipping') continue # Parsing the release page pattern = r'<div class="cover-art">\s*<img src="(?P<url>[^"]+)"' match = re.search(pattern, html) if not match: logging.info('[JOB] No cover art, skipping') continue url = match.group('url') if _fetch_cover(mbid, url): return logging.info('[JOB] Try to get cover from Last.fm') for rg in ReleaseGroup.objects.filter(mbid=mbid).select_related('artist'): urls = lastfm.get_cover_urls(rg.artist.name, rg.name) or [] for url in urls: if _fetch_cover(mbid, url): return logging.warning('[ERR] Could not find a cover')
def import_lastfm(user, username, count, period): logging.info('[JOB] Importing %d artists from Last.fm for user %s' % (count, username)) LIMIT = 50 page, added = 0, 0 while True: page += 1 tools.sleep() logging.info('[JOB] Getting page %d' % page) artists = lastfm.get_artists(username, period, LIMIT, page) if artists is None: logging.warning('[ERR] Last.fm error, retrying') page -= 1 continue if not artists: break for artist_data in artists: mbid = artist_data.get('mbid', '') if mbid: while True: # Artist.get_by_mbid will query MB if the artist is not yet # in the database. Query first to avoid unnecessary sleep. if not Artist.objects.filter(mbid=mbid).exists(): tools.sleep() logging.info('[JOB] Getting artist %s' % mbid) try: artist = Artist.get_by_mbid(mbid) except Artist.Blacklisted: logging.info('[JOB] Blacklisted artist, skipping') break except Artist.Unknown: logging.info('[JOB] Unknown artist, skipping') break if not artist: logging.warning( '[ERR] Cannot get the artist data, retrying') continue UserArtist.add(user, artist) break else: add_artist(user, artist_data['name']) added += 1 if added == count: break if added == count: break
def import_lastfm(user, username, count, period): logging.info('[JOB] Importing %d artists from Last.fm for user %s' % (count, username)) LIMIT = 50 page, added = 0, 0 while True: page += 1 tools.sleep() logging.info('[JOB] Getting page %d' % page) artists = lastfm.get_artists(username, period, LIMIT, page) if artists is None: logging.warning('[ERR] Last.fm error, retrying') page -= 1 continue if not artists: break for artist_data in artists: mbid = artist_data.get('mbid', '') if mbid: while True: # Artist.get_by_mbid will query MB if the artist is not yet # in the database. Query first to avoid unnecessary sleep. if not Artist.objects.filter(mbid=mbid).exists(): tools.sleep() logging.info('[JOB] Getting artist %s' % mbid) try: artist = Artist.get_by_mbid(mbid) except Artist.Blacklisted: logging.info('[JOB] Blacklisted artist, skipping') break except Artist.Unknown: logging.info('[JOB] Unknown artist, skipping') break if not artist: logging.warning('[ERR] Cannot get the artist data, retrying') continue UserArtist.add(user, artist) break else: add_artist(user, artist_data['name']) added += 1 if added == count: break if added == count: break
def add_release_groups(mbid): logging.info('[JOB] Fetching release groups for artist %s' % mbid) try: artist = Artist.objects.get(mbid=mbid) except Artist.DoesNotExist: logging.warning('[ERR] Cannot find by mbid, skipping' % mbid) return True LIMIT = 100 offset = 0 while True: tools.sleep() logging.info('[JOB] Fetching release groups at offset %d' % offset) release_groups = mb.get_release_groups(mbid, limit=LIMIT, offset=offset) if release_groups: with transaction.commit_on_success(): for rg_data in release_groups: # Ignoring releases without a release date or a type. if rg_data.get('first-release-date') and rg_data.get( 'type'): q = ReleaseGroup.objects.filter(artist=artist, mbid=rg_data['id']) if q.exists(): continue release_group = ReleaseGroup( artist=artist, mbid=rg_data['id'], name=rg_data['title'], type=rg_data['type'], date=str_to_date(rg_data['first-release-date']), is_deleted=False) release_group.save() if release_groups is None: logging.warning('[ERR] MusicBrainz error, retrying') continue if len(release_groups) < LIMIT: break offset += LIMIT return True
def send(): sent_emails = 0 sleep = True while True: if sleep: jobs.process() tools.sleep() sleep = False try: notification = Notification.objects.all()[0] except IndexError: break # last one with transaction.commit_on_success(): user = notification.user profile = user.get_profile() if profile.notify and profile.email_activated: types = profile.get_types() rg = notification.release_group if rg.type in types and is_recent(rg.date): sleep = True result = user.get_profile().send_email( subject='[muspy] New Release: %s - %s' % (rg.artist.name, rg.name), text_template='email/release.txt', html_template='email/release.html', release=rg, username=user.username, root='https://muspy.com/') if not result: logging.warning('Could not send to user %d, retrying' % user.id) continue sent_emails += 1 logging.info('Sent a notification to user %d' % user.id) notification.delete() return sent_emails
def send(): sent_emails = 0 sleep = True while True: if sleep: jobs.process() tools.sleep() sleep = False try: notification = Notification.objects.order_by('-user')[0] except IndexError: break # last one with transaction.commit_on_success(): user = notification.user profile = user.get_profile() if profile.notify and profile.email_activated: types = profile.get_types() release_groups = user.new_release_groups.select_related('artist').all() release_groups = [ rg for rg in release_groups if rg.type in types and is_recent(rg.date)] if release_groups: sleep = True result = user.get_profile().send_email( subject='[muspy] New Release Notification', text_template='email/release.txt', html_template='email/release.html', releases=release_groups, root='http://muspy.com/') if not result: logging.warning('Could not send to user %d, retrying' % user.id) continue sent_emails += 1 logging.info('Sent a notification to user %d' % user.id) user.new_release_groups.clear() logging.info('Sent %d email notifications' % sent_emails)
def add_release_groups(mbid): logging.info('[JOB] Fetching release groups for artist %s' % mbid) try: artist = Artist.objects.get(mbid=mbid) except Artist.DoesNotExist: logging.warning('[ERR] Cannot find by mbid, skipping' % mbid) return True LIMIT = 100 offset = 0 while True: tools.sleep() logging.info('[JOB] Fetching release groups at offset %d' % offset) release_groups = mb.get_release_groups(mbid, limit=LIMIT, offset=offset) if release_groups: with transaction.commit_on_success(): for rg_data in release_groups: # Ignoring releases without a release date or a type. if rg_data.get('first-release-date') and rg_data.get('type'): q = ReleaseGroup.objects.filter( artist=artist, mbid=rg_data['id']) if q.exists(): continue release_group = ReleaseGroup( artist=artist, mbid=rg_data['id'], name=rg_data['title'], type=rg_data['type'], date=str_to_date(rg_data['first-release-date']), is_deleted=False) release_group.save() if release_groups is None: logging.warning('[ERR] MusicBrainz error, retrying') continue if len(release_groups) < LIMIT: break offset += LIMIT return True
def add_artist(user, search): tools.sleep() logging.info('[JOB] Searching for artist [%s] for user %d' % (search, user.id)) found_artists, count = mb.search_artists(search, limit=2, offset=0) if found_artists is None: logging.warning('[ERR] MusicBrainz error while searching, skipping') return True only_one = len(found_artists) == 1 first_is_exact = (len(found_artists) > 1 and found_artists[0]['name'].lower() == search.lower() and found_artists[1]['name'].lower() != search.lower()) if only_one or first_is_exact: artist_data = found_artists[0] mbid = artist_data['id'] # get_by_mbid() queries MB, must sleep. tools.sleep() logging.info('[JOB] Adding artist %s' % mbid) try: artist = Artist.get_by_mbid(mbid) except Artist.Blacklisted: logging.warning('[ERR] Artist %s is blacklisted, skipping' % mbid) return True except Artist.Unknown: logging.warning('[ERR] Artist %s is unknown, skipping' % mbid) return True if not artist: logging.warning('[ERR] Could not fetch artist %s, retrying' % mbid) return False UserArtist.add(user, artist) else: logging.info( '[JOB] Could not identify artist by name, saving for later') UserSearch(user=user, search=search).save() return True
def process(): """Work on pending jobs.""" while True: try: job = Job.objects.select_related('user').order_by('id')[0] except IndexError: break if job.type == Job.ADD_ARTIST: if not add_artist(job.user, job.data): tools.sleep() continue elif job.type == Job.ADD_RELEASE_GROUPS: if not add_release_groups(job.data): tools.sleep() continue elif job.type == Job.GET_COVER: get_cover(job.data) elif job.type == Job.IMPORT_LASTFM: count, period, username = job.data.split(',', 2) import_lastfm(job.user, username, int(count), period) job.delete()
def add_artist(user, search): tools.sleep() logging.info('[JOB] Searching for artist [%s] for user %d' % (search, user.id)) found_artists, count = mb.search_artists(search, limit=2, offset=0) if found_artists is None: logging.warning('[ERR] MusicBrainz error while searching, skipping') return True only_one = len(found_artists) == 1 first_is_exact = (len(found_artists) > 1 and found_artists[0]['name'].lower() == search.lower() and found_artists[1]['name'].lower() != search.lower()) if only_one or first_is_exact: artist_data = found_artists[0] mbid = artist_data['id'] # get_by_mbid() queries MB, must sleep. tools.sleep() logging.info('[JOB] Adding artist %s' % mbid) try: artist = Artist.get_by_mbid(mbid) except Artist.Blacklisted: logging.warning('[ERR] Artist %s is blacklisted, skipping' % mbid) return True except Artist.Unknown: logging.warning('[ERR] Artist %s is unknown, skipping' % mbid) return True if not artist: logging.warning('[ERR] Could not fetch artist %s, retrying' % mbid) return False UserArtist.add(user, artist) else: logging.info('[JOB] Could not identify artist by name, saving for later') UserSearch(user=user, search=search).save() return True
def check(): logging.info('Start checking artists') checked_artists = 0 checked_release_groups = 0 day = datetime.datetime.utcnow().day artist = None while True: # Get the next artist. artists = Artist.objects.order_by('mbid') if artist: artists = artists.filter(mbid__gt=artist.mbid) try: artist = artists[0] except IndexError: break # last artist checked_artists += 1 # Artist names don't change that often. Update artists at most once # a month, unless we are debugging. if DEBUG or day == 1: jobs.process() tools.sleep() logging.info('Updating artist %s' % artist.mbid) artist_data = mb.get_artist(artist.mbid) if not artist_data: # TODO: musicbrainz/network error or deleted? logging.warning('Could not fetch artist data') elif artist_data['id'] != artist.mbid: # Requested and returned mbids are different if the artist has been merged. logging.info('Merging into artist %s' % artist_data['id']) try: new_artist = Artist.get_by_mbid(artist_data['id']) except (Artist.Blacklisted, Artist.Unknown): continue if not new_artist: continue cursor = connection.cursor() cursor.execute( """ UPDATE OR REPLACE "app_userartist" SET "artist_id" = %s WHERE "artist_id" = %s """, [new_artist.id, artist.id]) # Mark release groups as deleted. n = artist.releasegroup_set.update(is_deleted=True) logging.info('Deleted %s release groups' % n) continue else: # Update artist info if changed. updated = False if artist.name != artist_data['name']: artist.name = artist_data['name'] updated = True if artist.sort_name != artist_data['sort-name']: artist.sort_name = artist_data['sort-name'] updated = True if artist.disambiguation != artist_data.get('disambiguation', ''): artist.disambiguation = artist_data.get('disambiguation', '') updated = True if updated: logging.info('Artist changed, updating') artist.save() else: logging.info('Checking artist %s' % artist.mbid) current = {rg.mbid: rg for rg in ReleaseGroup.objects.filter(artist=artist)} # Get release groups LIMIT = 100 offset = 0 while True: jobs.process() tools.sleep() release_groups = mb.get_release_groups(artist.mbid, LIMIT, offset) if release_groups is None: logging.warning('Could not fetch release groups, retrying') continue logging.info('Fetched %s release groups' % len(release_groups)) with transaction.commit_on_success(): for rg_data in release_groups: mbid = rg_data['id'] # Ignore releases without a release date or a type. if not rg_data.get('first-release-date') or not rg_data.get('type'): if mbid in current: release_group = current[mbid] if not release_group.is_deleted: release_group.is_deleted = True release_group.save() logging.info('Deleted release group %s' % mbid) continue checked_release_groups += 1 release_date = str_to_date(rg_data['first-release-date']) if mbid in current: release_group = current[mbid] updated = False if release_group.is_deleted: release_group.is_deleted = False updated = True # Work-around MBS-4285. if release_group.name != rg_data['title'] and rg_data['title']: release_group.name = rg_data['title'] updated = True if release_group.type != rg_data['type']: release_group.type = rg_data['type'] updated = True if release_group.date != release_date: release_group.date = release_date updated = True if updated: release_group.save() logging.info('Updated release group %s' % mbid) del current[mbid] else: release_group = ReleaseGroup( artist=artist, mbid=rg_data['id'], name=rg_data['title'], type=rg_data['type'], date=release_date, is_deleted=False) release_group.save() logging.info('Created release group %s' % mbid) # Notify users cursor = connection.cursor() cursor.execute( """ INSERT INTO "app_notification" ("user_id", "release_group_id") SELECT "app_userartist"."user_id", "app_releasegroup"."id" FROM "app_userartist" JOIN "app_artist" ON "app_artist"."id" = "app_userartist"."artist_id" JOIN "app_releasegroup" ON "app_releasegroup"."artist_id" = "app_artist"."id" WHERE "app_releasegroup"."id" = %s """, [release_group.id]) logging.info('Notified %d users' % cursor.rowcount) if len(release_groups) < LIMIT: break offset += LIMIT with transaction.commit_on_success(): for mbid in current: release_group = current[mbid] if not release_group.is_deleted: release_group.is_deleted = True release_group.save() logging.info('Deleted release group %s' % mbid) logging.info('Checked %d artists and %d release groups' % (checked_artists, checked_release_groups))
def check(): logging.info('Start checking artists') sent_notifications = 0 checked_artists = 0 checked_release_groups = 0 day = datetime.datetime.utcnow().day artist = None while True: # Get the next artist. artists = Artist.objects.order_by('mbid') if artist: artists = artists.filter(mbid__gt=artist.mbid) try: artist = artists[0] except IndexError: break # last artist checked_artists += 1 # Artist names don't change that often. Update artists at most 3 times # a month, unless we are debugging. if DEBUG or day in (1, 11, 21): jobs.process() tools.sleep() logging.info('Updating artist %s' % artist.mbid) artist_data = mb.get_artist(artist.mbid) if not artist_data: # TODO: musicbrainz/network error or deleted? logging.warning('Could not fetch artist data') elif artist_data['id'] != artist.mbid: # Requested and returned mbids are different if the artist has been merged. logging.info('Merging into artist %s' % artist_data['id']) try: new_artist = Artist.get_by_mbid(artist_data['id']) except (Artist.Blacklisted, Artist.Unknown): continue if not new_artist: continue cursor = connection.cursor() cursor.execute( """ UPDATE OR REPLACE "app_userartist" SET "artist_id" = %s WHERE "artist_id" = %s """, [new_artist.id, artist.id]) # Delete the artist and its release groups. # Use SQL, delete() is buggy, see Django bug #16426. # TODO: possible FK constraint fail in app_star. cursor.execute( """ DELETE FROM "app_releasegroup" WHERE "artist_id" = %s """, [artist.id]) logging.info('Deleted release groups') cursor.execute( """ DELETE FROM "app_artist" WHERE "id" = %s """, [artist.id]) logging.info('Deleted the artist') continue else: # Update artist info if changed. updated = False if artist.name != artist_data['name']: artist.name = artist_data['name'] updated = True if artist.sort_name != artist_data['sort-name']: artist.sort_name = artist_data['sort-name'] updated = True if artist.disambiguation != artist_data.get('disambiguation', ''): artist.disambiguation = artist_data.get('disambiguation', '') updated = True if updated: logging.info('Artist changed, updating') artist.save() else: logging.info('Checking artist %s' % artist.mbid) current = {rg.mbid: rg for rg in ReleaseGroup.objects.filter(artist=artist)} # Get release groups LIMIT = 100 offset = 0 while True: sent_notifications += notifications.send() release_groups = mb.get_release_groups(artist.mbid, LIMIT, offset) if release_groups is None: logging.warning('Could not fetch release groups, retrying') continue logging.info('Fetched %s release groups' % len(release_groups)) with transaction.commit_on_success(): for rg_data in release_groups: mbid = rg_data['id'] # Ignore releases without a release date or a type. release_date = str_to_date(rg_data.get('first-release-date')) if not release_date or not rg_data.get('type'): if mbid in current: release_group = current[mbid] if not release_group.is_deleted: release_group.is_deleted = True release_group.save() logging.info('Deleted release group %s' % mbid) continue checked_release_groups += 1 if mbid in current: release_group = current[mbid] updated = False if release_group.is_deleted: release_group.is_deleted = False updated = True # Work-around MBS-4285. if release_group.name != rg_data['title'] and rg_data['title']: release_group.name = rg_data['title'] updated = True if release_group.type != rg_data['type']: release_group.type = rg_data['type'] updated = True if release_group.date != release_date: release_group.date = release_date updated = True if updated: release_group.save() logging.info('Updated release group %s' % mbid) del current[mbid] elif rg_data['title']: release_group = ReleaseGroup( artist=artist, mbid=rg_data['id'], name=rg_data['title'], type=rg_data['type'], date=release_date, is_deleted=False) release_group.save() logging.info('Created release group %s' % mbid) # Notify users cursor = connection.cursor() cursor.execute( """ INSERT INTO "app_notification" ("user_id", "release_group_id") SELECT "app_userartist"."user_id", "app_releasegroup"."id" FROM "app_userartist" JOIN "app_artist" ON "app_artist"."id" = "app_userartist"."artist_id" JOIN "app_releasegroup" ON "app_releasegroup"."artist_id" = "app_artist"."id" WHERE "app_releasegroup"."id" = %s """, [release_group.id]) logging.info('Will notify %d users' % cursor.rowcount) if len(release_groups) < LIMIT: break offset += LIMIT with transaction.commit_on_success(): for mbid in current: release_group = current[mbid] if not release_group.is_deleted: release_group.is_deleted = True release_group.save() logging.info('Deleted release group %s' % mbid) return (checked_artists, checked_release_groups, sent_notifications)