Пример #1
0
def get_abstract_from_bbc(artist):
    """
    Populate CachedArtist with short Wikipedia abstract from BBC API.

    BBC provide abstracts only for artists, so we skip it if argument is a release

    @param artist_or_releasegroup: a CachedArtist or CachedReleaseGroup object

    @return: a dictionary with an abstract structure
    """
    abstract = {}
    if artist._doc_type == 'CachedArtist' and 'bbc' not in artist.cache_state:
        try:
            t = mmda_logger('bbc','request','abstract',artist.get_id)
            xml = urlopen("http://www.bbc.co.uk/music/artists/%s/wikipedia.xml" % artist.get_id, timeout=ABSTRACT_TIMEOUT).read()
            xmlSoup = BeautifulStoneSoup(xml, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
            abstract = {
                    'content':xmlSoup.wikipedia_article.content.text,
                    'url':xmlSoup.wikipedia_article.url.text,
                    'lang':'en',
                    'provider':'Wikipedia'
                    }
        except Exception, e:
            mmda_logger('bbc','ERROR',e)
            cache_state = 0
        else:
            mmda_logger('bbc','result','found',abstract['url'],t)
            cache_state = 1
        artist.cache_state['bbc'] = [cache_state,datetime.utcnow()]
        artist.changes_present = True
Пример #2
0
Файл: news.py Проект: lidel/mmda
def _get_myspace_id(profile_url):
    """
    Return myspace user id.

    Be smart, and stop download if ID is found.

    @param url: a string with Myspace profile URL

    @return: a string with user id
    """
    BUFFERSIZE = 2048
    id = None
    re_myspaceid = re.compile("blogs.myspace.com/index.cfm\?fuseaction=blog.ListAll&friendId=(?P<friend_id>\d+)")

    t = mmda_logger('mspc','request','find ID',profile_url)
    try:
        usock =  HTTP_OPENER.open(profile_url, timeout=SEARCH_TIMEOUT)
        while 1:
            buffer = usock.read(BUFFERSIZE)
            r = re_myspaceid.search(buffer)
            if r and r.groups():
                id = r.groups()[0]
                break
            if len(buffer) < BUFFERSIZE: break
        usock.close()
    except Exception, e:
        mmda_logger('myspace','ERROR',e)
Пример #3
0
Файл: news.py Проект: lidel/mmda
def _get_news_sources(artist):
    """
    Find RSS/Atom feeds avaiable for artist.

    @param artist: a CachedArtist object

    @return: a list of strings with URLs
    """
    sources = []
    future_calls = []

    if 'Myspace' in artist.urls:

        if 'myspace_id' not in artist:
            myspace_profile = artist.urls['Myspace'][0]
            myspace_id = _get_myspace_id(myspace_profile)
            artist.myspace_id = myspace_id
            artist.changes_present = True

        if 'myspace_id' in artist and artist.myspace_id:
            myspace_blog_feed = "http://blogs.myspace.com/Modules/BlogV2/Pages/RssFeed.aspx?friendID=%s" % artist.myspace_id
            sources.append(myspace_blog_feed)

    t = mmda_logger('www','request','find feeds',artist.name)

    for source_type in LOOK_FOR_FEEDS:
        if source_type in artist.urls:
            future_calls = [Future(_get_feed_link_for,url) for url in artist.urls[source_type]]

    sources.extend(list(set([url() for url in future_calls if url()])))

    mmda_logger('www','result','found feeds',len(sources),t)

    return [(src,None,None) for src in sources]
Пример #4
0
 def save_any_changes(self):
     """
     Store document in the database if it is marked as 'changes_present'.
     """
     if 'changes_present' in self:
         del self.changes_present
         self.save()
         mmda_logger('db','store',self)
Пример #5
0
def get_basic_artist(mbid):
    """
    Make sure basic artist document is present and contains required data.

    @param mbid:    a string containing a MusicBrainz ID of an artist

    @return: a CachedArtist object containing required minimal data set
    """
    #TODO: handle Various Artists' artist (VARIOUS_ARTISTS_ID)
    try:
        artist = CachedArtist.get(mbid)
        mmda_logger('db','present',artist._doc_type, artist.get_id)
    except ResourceNotFound:
        artist = _create_mb_artist(mbid)
    return  artist
Пример #6
0
def populate_release_lastfm(release_group, release_mbid):
    """
    Make sure all required and avaiable last.fm data is present in a CachedReleaseGroup document.

    @param release_group: a CachedReleaseGroup object
    @param release_mbid:  a string containing a MusicBrainz ID of a release

    @return: a validated/updated CachedReleaseGroup object
    """
    #if release_group.cache_state['lastfm'][0] == 0:
    release = release_group.releases[release_mbid]
    if 'lastfm' not in release_group.cache_state:
        lastfm = pylast.get_lastfm_network(api_key = settings.LASTFM_API_KEY)
        lastfm.enable_caching()
        try:
            t = mmda_logger('last','request','release-data',release_mbid)

            lastfm_album = lastfm.get_album_by_mbid(release_mbid)

            lastfm_abstract = None
            lastfm_cover    = None
            lastfm_url      = lastfm_album.get_url()

            if 'abstract' not in release_group:
                lastfm_abstract = Future(lastfm_album.get_wiki_summary)
            if 'cover' not in release:
                lastfm_cover = lastfm_album.get_cover_image()
            # wait for Future
            if 'abstract' not in release_group:
                lastfm_abstract()

            mmda_logger('last','result','release-data',release_mbid,t)
        except Exception, e:
            mmda_logger('pylast','ERROR',e)
        else:
                if 'urls' not in release:
                    release['urls'] = {}
                release['urls']['Last.fm'] = [lastfm_url]

                if lastfm_abstract and lastfm_abstract():
                    release_group.abstract = {'content':strip_tags(lastfm_abstract()), 'lang':'en', 'provider':'Last.fm', 'url':lastfm_url}

                if lastfm_cover:
                    release['cover'] = lastfm_cover

        # TODO: when to save? when failed do we retry?
        release_group.cache_state['lastfm']    = [1,datetime.utcnow()]
        release_group.changes_present = True
Пример #7
0
def get_basic_cached_search_result(query_type, query_string):
    """
    Make sure proper CachedSearchResult is present and return its id.

    Method performs local, then optional remote (MusicBrainz) lookup of query result

    @param query_type: a string containing query type
    @param query_string: a string containing query

    @return: a string containing SHA1 hash of a query string (the ID of a CachedSearchResult document)
    """
    query_id        = hashlib.sha1((query_type+query_string).encode('utf-8')).hexdigest()
    search_result   = CachedSearchResult.get_or_create(query_id)
    search_result.query_string  = query_string
    search_result.query_type    = query_type
    if 'mb' not in search_result.cache_state: #TODO: add 14day window check

        try:
            t = mmda_logger('mb','request','search for',query_string)

            if query_type == 'artist':
                filter  = ws.ArtistFilter(name=query_string,limit=RESULTS_LIMIT)
                results = mb_query.getArtists(filter) #TODO: add try, or maybe better in 'create_search' as a global wrapper
                search_result.results = [ {'name':r.artist.name, 'mbid':extractUuid(r.artist.id), 'score':r.score, 'note':r.artist.disambiguation } for r in results ]

            elif query_type == 'release':
                filter  = ws.ReleaseFilter(title=query_string,limit=RESULTS_LIMIT)
                results = mb_query.getReleases(filter) #TODO: add try, or maybe better in 'create_search' as a global wrapper
                search_result.results = [ {'artist':r.release.artist.name, 'title':r.release.title, 'mbid':extractUuid(r.release.id), 'artist_mbid':extractUuid(r.release.artist.id), 'score':r.score, 'tracks_count':r.release.tracksCount, 'year':r.release.getEarliestReleaseEvent().getDate() if r.release.getEarliestReleaseEvent() else None} for r in results ]

            elif query_type == 'tag':
                # TODO: refactor to other packages
                import pylast
                lastfm = pylast.get_lastfm_network(api_key = settings.LASTFM_API_KEY)
                lastfm_similar_tags = lastfm.search_for_tag(query_string).get_next_page()
                search_result.results = [ tag.name for tag in lastfm_similar_tags ]

        except Exception, e:
            # TODO: hard error here
            mmda_logger('search','ERROR',e)
            raise e
        else:
            mmda_logger('mb','result','results',len(search_result.results),t)
            search_result.cache_state['mb'] = [1,datetime.utcnow()]
            search_result.save()
Пример #8
0
def _create_mb_artist(mbid):
    """
    Fetch basic metadata and store it as a CachedArtist document.

    @param mbid: a string containing a MusicBrainz ID of an artist

    @return: a CachedArtist object with basic MusicBrainz data
    """
    try:
        t = mmda_logger('mb','request','artist',mbid)
        mb_artist = mb_query.getArtistById(mbid, MB_ARTIST_INCLUDES)
        mmda_logger('mb','result', 'artist',mb_artist.name,t)
    except WebServiceError, e:
        # TODO: hard error page here
        # TODO: 404 not found redirect to different page? conditional?
        # TODO:  HTTP Error 503: Service Temporarily Unavailable -> special case:  please wait few seconds and hit F5
        mmda_logger('mb-artist','ERROR',e)
        raise e
Пример #9
0
def get_abstract_from_dbpedia(artist_or_releasegroup):
    """
    Populate CachedArtist or CachedRleaseGroup with short abstract.

    @param artist_or_releasegroup: a CachedArtist or CachedReleaseGroup object

    @return: a dictionary with an abstract structure
    """
    abstract = {}
    # if artist_or_releasegroup is ReleaseGroup, we look for release with wikipedia URL
    # TODO: check performance, and if better - replace in other parts
    # TODO: DRY: refactor
    if 'dbpedia' not in artist_or_releasegroup.cache_state:
        wiki_resource = None
        cache_state = 0

        if 'releases' in artist_or_releasegroup:
            for release in artist_or_releasegroup['releases'].itervalues():
                if 'urls' in release  and 'Wikipedia' in release['urls']:
                    wiki_resource, wiki_lang, wiki_url = find_best_wikipedia_resource(release['urls']['Wikipedia'])
        elif 'urls' in artist_or_releasegroup and 'Wikipedia' in artist_or_releasegroup['urls']:
            wiki_resource, wiki_lang, wiki_url = find_best_wikipedia_resource(artist_or_releasegroup['urls']['Wikipedia'])

        if wiki_resource:
            store = surf.Store(reader = "sparql_protocol", endpoint = "http://dbpedia.org/sparql")
            session = surf.Session(store)
            sparql_query = "SELECT ?abstract WHERE {{ <http://dbpedia.org/resource/%s> <http://dbpedia.org/property/abstract> ?abstract FILTER langMatches( lang(?abstract), '%s') } }" % (wiki_resource, wiki_lang)
            try:
                t = mmda_logger('wiki','request','abstract',wiki_resource)
                # TODO: timeout?
                sparql_result = session.default_store.execute_sparql(sparql_query) # TODO: error handling
                mmda_logger('wiki','result','found',len(sparql_result['results']['bindings']),t)
                if sparql_result['results']['bindings'][0]['abstract']:
                    abstract = {'content':unicode(sparql_result['results']['bindings'][0]['abstract']), 'url':wiki_url, 'lang':wiki_lang, 'provider':'Wikipedia'}
                    # TODO: add cache_status dbpedia
            except Exception, e:
                # TODO: handle it?
                mmda_logger('surf-dbpedia','ERROR',e)
            else:
                cache_state = 1

        artist_or_releasegroup.cache_state['dbpedia'] = [cache_state,datetime.utcnow()]
        artist_or_releasegroup.changes_present = True
Пример #10
0
def populate_artist_pictures_flickr(artist_pictures):
    """
    Make sure desired flickr pictures are present in a CachedArtistPictures document.

    @param artist_pictures: a CachedArtistPictures object

    @return: a validated/updated CachedArtistPictures object
    """
    # TODO: cache flickr only for a day?
    if 'flickr' not in artist_pictures.cache_state:
        flickr = flickrapi.FlickrAPI(settings.FLICKR_API_KEY, cache=True)
        flickr.cache = cache

        # TODO: use artist aliases  as alternative tags? (what about tag_mode?)
        artist_tags = artist_pictures.artist_name
        if 'artist_aliases' in artist_pictures:
            artist_tags += ',' + ','.join(artist_pictures.artist_aliases)

        includes = 'owner_name, url_sq, url_o'
        licenses = '1,2,3,4,5,6,7' # http://www.flickr.com/services/api/flickr.photos.licenses.getInfo.html

        data_walker = flickr.walk(tag_mode='any',tags=artist_tags.lower(),media='photos',license=licenses,extras=includes,per_page=FLICKR_LIMIT)
        # TODO: make two walks. first one with events from lastfm, second if result list is less than FLICKR_LIMIT

        flickr_photos = []
        try:
            t = mmda_logger('flkr','request','artist pictures',artist_pictures._id)
            for i in xrange(FLICKR_LIMIT):
                f_photo = data_walker.next()
                photo = {'owner_id':f_photo.get('owner'), 'id':f_photo.get('id'), 'title':f_photo.get('title'), 'sq':f_photo.get('url_sq'), 'big':f_photo.get('url_o'), 'owner':f_photo.get('ownername')}
                flickr_photos.append(photo)
            mmda_logger('flkr','result','found pictures',len(flickr_photos), t)
        except Exception, e:
            mmda_logger('flickrapi','ERROR',e)
        if flickr_photos:
            artist_pictures.flickr = flickr_photos
            artist_pictures.cache_state['flickr'] = [2,datetime.utcnow()]
            artist_pictures.changes_present = True
Пример #11
0
def get_basic_release(mbid):
    """
    Make sure release and its dependencies are present and contain required data.

    @param mbid: a string containing a MusicBrainz ID of an artist

    @return:  a CachedReleaseGroup object containing required minimal data set
    """
    release_group   = CachedReleaseGroup.view('artists/releases',include_docs=True, key=mbid).one()
    if not release_group:
        # TODO: optimize? its just one additional request on rare ocassions tho..
        try:
            t = mmda_logger('mb','request','artist mbid of release',mbid)
            mb_release  = mb_query.getReleaseById(mbid, MB_RELEASE_ARTIST)
            artist_mbid = extractUuid(mb_release.artist.id)
            mmda_logger('mb','result','artist mbid',artist_mbid,t)
        except WebServiceError, e:
            # TODO: add error handling here
            mmda_logger('mb-release','ERROR',e)
            raise e
        else:
            get_basic_artist(artist_mbid)
            release_group = CachedReleaseGroup.view('artists/releases',include_docs=True, key=mbid).one()
Пример #12
0
def populate_tag_lastfm(tag):
    """
    Make sure all avaiable last.fm data is present in a CachedTag document.

    @param tag: a CachedTag object

    @return: a validated/updated CachedTag object
    """
    # TODO: when expire?
    if 'lastfm' not in tag.cache_state:
        lastfm = pylast.get_lastfm_network(api_key = settings.LASTFM_API_KEY)
        lastfm.enable_caching()
        try:
            t = mmda_logger('last','request','tag-artists',tag.get_id)
            lastfm_tag = lastfm.get_tag(tag.get_id.replace('-',' ')) # TODO: this is an ugly fix, make it pretty
            lastfm_artists = _lastfm_get_tag_artists_optimized(lastfm_tag)
            mmda_logger('last','result','found',len(lastfm_artists),t)
        except Exception, e:
            mmda_logger('pylast','ERROR',e)
        else:
            if lastfm_artists:
                tag.artists = lastfm_artists
            tag.cache_state['lastfm'] = [1,datetime.utcnow()]
            tag.changes_present = True
Пример #13
0
def populate_artist_pictures_lastfm(artist_pictures):
    """
    Make sure all avaiable last.fm data is present in a CachedArtistPictures document.

    @param artist_pictures: a CachedArtistPictures object

    @return: a validated/updated CachedArtistPictures object
    """
    if 'lastfm' not in artist_pictures.cache_state or artist_pictures.cache_state['lastfm'][0] == 1:
        lastfm = pylast.get_lastfm_network(api_key = settings.LASTFM_API_KEY)
        lastfm.enable_caching()
        try:
            t = mmda_logger('last','request','artist pictures',artist_pictures._id)
            lastfm_artist = lastfm.get_artist_by_mbid(artist_pictures._id)
            lastfm_images = lastfm_artist.get_images(order=pylast.IMAGES_ORDER_POPULARITY,limit=LASTFM_PICTURE_LIMIT)
            # TODO: add lastfm event info, that can be used as a tag in flickr search
            mmda_logger('last','result','found pictures',len(lastfm_images),t)
        except Exception, e:
            mmda_logger('pylast','ERROR',e)
        else:
            if lastfm_images:
                artist_pictures.lastfm = [ {'sq':i.sizes.largesquare, 'big':i.sizes.original, 'url':i.url,'title':i.title} for i in lastfm_images]
            artist_pictures.cache_state['lastfm'] = [2,datetime.utcnow()]
            artist_pictures.changes_present = True
Пример #14
0
def get_basic_artist_videos(mbid):
    """
    Make sure document and its dependencies are present and contain required data.

    @param mbid: a string containing a MusicBrainz ID of an artist

    @return:  a CachedArtistVideos object containing minimal data set
    """
    try:
        artist_videos = CachedArtistVideos.get(mbid)
        if 'artist_name' not in artist_videos:
            raise ResourceNotFound
    except ResourceNotFound:
        # overhead, but in most cases artist page
        # is a place where user will go next anyway
        artist = get_basic_artist(mbid)
        # TODO: just an idea: create a view that store only names and aliases?
        artist_videos = CachedArtistVideos.get_or_create(mbid)
        artist_videos.artist_name = artist.name
        if 'aliases' in artist:
            artist_videos.artist_aliases = list(artist.aliases)
        artist_videos.save()
        mmda_logger('db','store', artist_videos)
    return  artist_videos
Пример #15
0
def populate_artist_videos_youtube(artist_videos):
    """
    Make sure all avaiable youtube meta-data is present in a CachedArtistVideos document.

    @param artist_videos: a CachedArtistVideos object

    @return: a validated/updated CachedArtistVideos object
    """
    # TODO: expire in one week?
    if "youtube" not in artist_videos.cache_state:

        youtube_videos = []
        yt_service = yts.YouTubeService()
        artist = get_basic_artist(artist_videos._id)

        try:
            t = mmda_logger("yt", "request", "artist-videos", artist_videos.artist_name)

            # check if artist has dedicated Youtube channel
            if "urls" in artist and "Youtube" in artist.urls:
                artist_videos.youtube_channel = artist.urls["Youtube"][0]
                youtube_id = _get_youtube_id(artist)

                feed = yt_service.GetYouTubeVideoFeed(
                    "http://gdata.youtube.com/feeds/api/users/%s/uploads" % youtube_id
                )

            # if there is no official channel, make a search query
            else:
                query = yts.YouTubeVideoQuery()

                query.orderby = "relevance"
                query.racy = "exclude"
                query.max_results = YOUTUBE_MAX_RESULTS
                query.categories.append("Music")

                # 'bug' workaround (http://bugs.python.org/issue1712522)
                query.vq = artist_videos.artist_name.encode("utf-8", "/")

                # TODO: aliases? atm they seems to lower the result quality
                # query.vq = u"Múm OR Múm OR mum OR múm".encode('utf-8', '/')

                feed = yt_service.YouTubeQuery(query)

        except Exception, e:
            mmda_logger("yt-search", "ERROR", e)
            # raise Http500
        else:
            mmda_logger("yt", "result", "artist-videos", len(feed.entry), t)

            for entry in feed.entry:
                try:
                    video = {
                        "title": entry.media.title.text,
                        "duration": entry.media.duration.seconds,
                        "url": entry.media.player.url,
                        "player": entry.GetSwfUrl(),
                        "thumb": entry.media.thumbnail[0].url,
                    }
                # sometimes objects we have are wicked -- we reject them
                # eg. when official channel contains blocked in some regions videos
                # example: http://www.youtube.com/user/dreamtheater
                # TODO: what if there is no videos left?
                #       example: http://127.0.0.1:8000/artist/the-beatles/videos/b10bbbfc-cf9e-42e0-be17-e2c3e1d2600d/
                except (NameError, AttributeError):
                    continue
                else:
                    youtube_videos.append(video)

            if youtube_videos:
                artist_videos.youtube = youtube_videos
            artist_videos.cache_state["youtube"] = [1, datetime.utcnow()]
            artist_videos.changes_present = True
            mmda_logger("db", "store", artist_videos)
Пример #16
0
Файл: news.py Проект: lidel/mmda
def populate_artist_news(news, artist):

    # TODO: add condition that search for new feeds each 7 days?
    if news.sources:
        mmda_logger('news','present','cached',len(news.sources.keys()))
        """
            Sad story about ETag and Last-Modified Headers:

            SOME SITES JUST DON'T GIVE A DAMN.

            The End.

            Example:
                myspace feeds have no etag or L-M headers (17/Apr/2010)

            That is why MMDA uses 'cache' field to limit number
            of HTTP requests to such handicapped feeds separately

            reference: http://www.feedparser.org/docs/http-etag.html
        """
        now = datetime.utcnow()
        pending_sources = []

        for source_url in  news.sources.keys():
            source          = news.sources[source_url]
            etag            = source['etag']                    if source.has_key('etag')     else None
            last_modified   = source['modified'].utctimetuple() if source.has_key('modified') else None
            feed_is_smart   = etag or last_modified
            cache_time      = (now - source['cache']).seconds

            if (feed_is_smart and cache_time > SMART_FEED_CACHE) or cache_time > FEED_CACHE:
                pending_sources.append((source_url, etag, last_modified))
    else:
        # if there are no cached feeds
        pending_sources = _get_news_sources(artist)

    if pending_sources:

        try:
            t = mmda_logger('news','request','to check',len(pending_sources))
            future_calls = [Future(_get_fetched_and_parsed_feed,source) for source in pending_sources]
            fetched_feeds = [future_obj() for future_obj in future_calls if timeout(future_obj,t=20)]

        except Exception, e:
            mmda_logger('feed-fetch','ERROR',e)

        else:
            for feed in fetched_feeds:
                try:

                    if feed.status == 304:
                        mmda_logger('news','present','no updates',feed.href)
                        news.sources[feed.href]['cache'] = datetime.utcnow()
                        news.changes_present = True
                        continue
                    elif feed.status == 404:
                        mmda_logger('news','present','404',feed.href)
                        del news.sources[feed.href]
                    else:
                        feed_src = urlparse.urlsplit(feed.feed.link).netloc.replace('www.','')
                        mmda_logger('news','present','fetched',feed_src)

                    feed_entries = [{
                        'title':    e.title.strip() if e.has_key('title') else None,
                        'summary':  e.summary if e.has_key('summary') else None,
                        'date':     datetime(*e.updated_parsed[0:6]),
                        'url':      e.link
                        } for e in feed.entries]

                    news_source = {
                            'url':  feed.feed.link,
                            'name': feed_src,
                            'items':feed_entries,
                            'cache':datetime.utcnow()
                            }
                    if feed.has_key('modified') and feed.modified:
                        news_source['modified'] = datetime(*feed.modified[0:6])
                    if feed.has_key('etag') and feed.etag:
                        news_source['etag'] = feed.etag

                except Exception, e:
                    # some feeds may be badly formatted, sometimes FeedFinder may fail
                    # some myspace feeds have no entries...  trying
                    # to predict all possible failures is pointless.
                    # in such cases mmda just jumps to the next feed
                    mmda_logger('feed-parse','ERROR',e)
                    continue
                else:
                    news.sources[feed.href] = news_source
                    news.changes_present = True

            mmda_logger('news','result','got results',len(pending_sources),t)
Пример #17
0
def populate_artist_lastfm(artist):
    """
    Make sure all required and avaiable last.fm data is present in a CachedArtist document.

    @param artist: a CachedArtist object

    @return: a validated/updated CachedArtist object
    """
    if 'lastfm' not in artist.cache_state:
        lastfm = pylast.get_lastfm_network(api_key = settings.LASTFM_API_KEY)
        lastfm.enable_caching()
        try:
            t = mmda_logger('last','request','artist-data',artist._id)
            lastfm_artist = lastfm.get_artist_by_mbid(artist._id)
            # TODO: run there in parallel (?)
            lastfm_images = Future(lastfm_artist.get_images,pylast.IMAGES_ORDER_POPULARITY,5)
            lastfm_url    = Future(lastfm_artist.get_url)
            # we get similar artists from lastfm database, but only those with mbid (omg, omg)
            # TODO: think about numbers of fetched things
            lastfm_similar  = Future(_lastfm_get_similar_optimized,lastfm_artist,10)
            lastfm_tags     = Future(lastfm_artist.get_top_tags,10)
            lastfm_abstract = None
            if 'abstract' not in artist:
                lastfm_abstract = lastfm_artist.get_bio_summary()

            # wait for all Future to come ;-)
            lastfm_url()
            lastfm_tags()
            lastfm_images()
            lastfm_similar()

            mmda_logger('last','result','artist-data',artist._id,t)
        except Exception, e:
            mmda_logger('pylast','ERROR',e)
        else:
            # TODO: make it compatible with tags imported from mb (TODO2: add tags from MusicBrainz)

            # TODO: remove random?
            import random
            random.shuffle(lastfm_tags())

            if lastfm_abstract:
                artist.abstract = {'content':strip_tags(lastfm_abstract), 'lang':'en', 'provider':'Last.fm', 'url':lastfm_url()}

            artist.tags                     = [(t.item.name.lower(), int( float(t.weight)/(float(100)/float(4)) ) ) for t in lastfm_tags()]
            artist.similar                  = lastfm_similar()
            artist.urls['Last.fm']          = [lastfm_url()]

            # TODO: optimize
            if lastfm_images():
                artist_pictures = CachedArtistPictures.get_or_create(artist._id)
                if 'lastfm' not in artist_pictures:
                    artist_pictures.artist_name = artist.name
                    artist_pictures.lastfm = [ {'sq':i.sizes.largesquare, 'big':i.sizes.original, 'url':i.url,'title':i.title} for i in lastfm_images()]
                    artist_pictures.cache_state['lastfm'] = [1,datetime.utcnow()]
                    artist_pictures.save()
                    mmda_logger('db','store',artist_pictures)

        # if fail, store state too -- to avoid future attempts
        artist.cache_state['lastfm']    = [1,datetime.utcnow()]
        artist.changes_present = True
Пример #18
0
def _populate_deep_release_mb(release_group,release_mbid):
    """
    Make sure ReleaseGroup contains additional, detailed information about specified release.

    @param release_group: a CachedReleaseGroup object
    @param release_mbid:  a string containing a MusicBrainz ID of a release

    @return: a verified/updated CachedReleaseGroup object
    """
    release = release_group.releases[release_mbid]
    if release['cache_state']['mb'][0] == 1:
        # TODO: remove unused includes
        try:
            t = mmda_logger('mb','request','release',release_mbid)
            mb_release  = mb_query.getReleaseById(release_mbid, MB_RELEASE_INCLUDES)
            mmda_logger('mb','result','release',mb_release.title,t)
        except WebServiceError, e:
            # TODO: hard error here
            mmda_logger('mb-release','ERROR',e)
            raise e
        else:
            # make sure mbid of an artist is present
            if 'artist_mbid' not in release_group:
                release_group.artist_mbid = extractUuid(mb_release.artist.id)

            # TRACK LISTING
            # TODO: think about duration representation here
            tracks = []
            for mb_track in mb_release.tracks:
                track = {'title':mb_track.title, 'mbid':extractUuid(mb_track.id)}
                if mb_track.duration:
                    track['duration'] = humanize_duration(mb_track.duration)
                tracks.append(track)
            release['tracks'] = tracks

            # URL relations
            urls = {}
            for relation in mb_release.getRelations(Relation.TO_URL):
                relation_type = decruft_mb(relation.type)
                if relation_type not in urls:
                    urls[relation_type] = []
                urls[relation_type].append(relation.targetId)
            # urls is used in many places, so its handy to have it ready
            release['urls'] = urls

            # CREDIT relations
            credits = [{'type':decruft_mb(r.type), 'mbid':extractUuid(r.targetId), 'name':r.target.name} for r in mb_release.getRelations(Relation.TO_ARTIST)]
            if credits:
                release['credits'] = credits

            # MULTI-DISC-RELEASE information
            remasters = []
            for relation in mb_release.getRelations(Relation.TO_RELEASE):
                relation_type = decruft_mb(relation.type)
                linked_release = {'mbid':extractUuid(relation.targetId), 'title':relation.target.title}

                if relation_type == 'PartOfSet':
                    if relation.direction == 'backward':
                        release['set_prev'] = linked_release
                    else:
                        release['set_next'] = linked_release

                elif relation_type == 'Remaster':
                    if relation.direction == 'backward':
                        remasters.append(linked_release)
                    else:
                        release['remaster_of'] = linked_release
            if remasters:
                release['remasters'] = remasters

            release['cache_state']['mb'] = [2,datetime.utcnow()]
            release_group = _perform_cover_lookup_on_mb_data(release_group, release_mbid)
            release_group.changes_present = True
Пример #19
0
    if not release_group:
        # TODO: optimize? its just one additional request on rare ocassions tho..
        try:
            t = mmda_logger('mb','request','artist mbid of release',mbid)
            mb_release  = mb_query.getReleaseById(mbid, MB_RELEASE_ARTIST)
            artist_mbid = extractUuid(mb_release.artist.id)
            mmda_logger('mb','result','artist mbid',artist_mbid,t)
        except WebServiceError, e:
            # TODO: add error handling here
            mmda_logger('mb-release','ERROR',e)
            raise e
        else:
            get_basic_artist(artist_mbid)
            release_group = CachedReleaseGroup.view('artists/releases',include_docs=True, key=mbid).one()
    else:
        mmda_logger('db','present',release_group._doc_type, release_group.get_id)
    return release_group

def _populate_deep_release_mb(release_group,release_mbid):
    """
    Make sure ReleaseGroup contains additional, detailed information about specified release.

    @param release_group: a CachedReleaseGroup object
    @param release_mbid:  a string containing a MusicBrainz ID of a release

    @return: a verified/updated CachedReleaseGroup object
    """
    release = release_group.releases[release_mbid]
    if release['cache_state']['mb'][0] == 1:
        # TODO: remove unused includes
        try:
Пример #20
0
Файл: news.py Проект: lidel/mmda
    """
    Return myspace user id.

    Be smart, and stop download if ID is found.

    @param url: a string with Myspace profile URL

    @return: a string with user id
    """
    BUFFERSIZE = 2048
    id = None
    re_myspaceid = re.compile("blogs.myspace.com/index.cfm\?fuseaction=blog.ListAll&friendId=(?P<friend_id>\d+)")

    t = mmda_logger('mspc','request','find ID',profile_url)
    try:
        usock =  HTTP_OPENER.open(profile_url, timeout=SEARCH_TIMEOUT)
        while 1:
            buffer = usock.read(BUFFERSIZE)
            r = re_myspaceid.search(buffer)
            if r and r.groups():
                id = r.groups()[0]
                break
            if len(buffer) < BUFFERSIZE: break
        usock.close()
    except Exception, e:
        mmda_logger('myspace','ERROR',e)

    mmda_logger('mspc','result','myspace ID',id,t)
    return id

Пример #21
0
    try:
        t = mmda_logger('mb','request','artist',mbid)
        mb_artist = mb_query.getArtistById(mbid, MB_ARTIST_INCLUDES)
        mmda_logger('mb','result', 'artist',mb_artist.name,t)
    except WebServiceError, e:
        # TODO: hard error page here
        # TODO: 404 not found redirect to different page? conditional?
        # TODO:  HTTP Error 503: Service Temporarily Unavailable -> special case:  please wait few seconds and hit F5
        mmda_logger('mb-artist','ERROR',e)
        raise e
    else:
        artist                      = CachedArtist.get_or_create(mbid)
        artist                      = _populate_artist_mb(artist, mb_artist)
        artist.cache_state['mb']    = [1,datetime.utcnow()]
        artist.save()
        mmda_logger('db','store',artist)

        # since we have some basic release data fetched with artist, store it
        _create_shallow_releases_mb(mb_artist)

        # TODO: think about genres and origin - fetch from freebase(?)
        # freebase.mqlread({"type":"/music/artist", "limit": 1, "key": [{"namespace" : '/authority/musicbrainz',"value" : '579ef111-19dd-4ae8-ad50-d5fa435472b9'}], "genre":[], "origin":None} )
    return artist

def _populate_artist_mb(artist, mb_artist):
    """
    Process data from MusicBrainz and store it in dedicated structures of CachedArtist.

    @param artist: a CachedArtist object
    @param mb_artist: a musicbrainz2.model.Artist object
Пример #22
0
def _create_shallow_releases_mb(mb_artist):
    """
    Create CachedReleaseGroup documents using basic MusicBrainz data fetched with artist.

    @param mb_artist: a musicbrainz2.model.Artist object
    """
    mb_releases = mb_artist.getReleases()
    artist_mbid = extractUuid(mb_artist.id)

    # magical place where all data is cached/processed before database commit
    there_will_be_dragons = {}

    for mb_release in mb_releases:
        group_mbid      = extractUuid(mb_release.releaseGroup.id)
        release_mbid    = extractUuid(mb_release.id)

        # its ugly, but we fill this only once (place for future improvements)
        if group_mbid not in there_will_be_dragons:
            release_group                       = {}
            release_group['_id']                = group_mbid
            release_group['artist_mbid']        = artist_mbid
            release_group['artist_name']        = mb_artist.name
            release_group['title']              = mb_release.releaseGroup.title
                                                # small fix: in some rare cases, ReleaseGroup at Musicbrainz has no 'type' property
            release_group['release_type']       = decruft_mb(mb_release.releaseGroup.type) if mb_release.releaseGroup.type else 'Other'
            release_group['releases']           = {}
            there_will_be_dragons[group_mbid]   = release_group
        else:
            release_group = there_will_be_dragons[group_mbid]

        # store only basic information about release event
        mb_release_events = []
        for mb_event in mb_release.getReleaseEvents():
            event = {}
            if mb_event.date:
                event['date']    = mb_event.date
            if mb_event.format:
                event['format']  = decruft_mb(mb_event.format)
            if mb_event.country:
                event['country'] = mb_event.country
            if event:
                mb_release_events.append(event)

        release_group['releases'][release_mbid] = {
                'title':mb_release.title,
                'tracks_count':mb_release.tracksCount,
                'release_events':mb_release_events,
                'cache_state':{'mb':[1,datetime.utcnow()]}
                }

        # primary release is the one with earliest release date (place for future improvements)
        mb_earliest_release_date = mb_release.getEarliestReleaseEvent().getDate() if mb_release.getEarliestReleaseEvent() else None
        if 'primary' not in release_group or release_group['primary'][1] == None or mb_earliest_release_date < release_group['primary'][1]:
            release_group['primary'] = [release_mbid, mb_earliest_release_date]

    # just to make sure no old data is left..
    old_cached_release_groups = get_db('artists').view('artists/release_groups', key=artist_mbid)
    for group in old_cached_release_groups:
        del get_db('artists')[group['id']]

    for release_group in there_will_be_dragons.itervalues():
        cached_release_group = CachedReleaseGroup.wrap(release_group) # TODO: think if wrap is the best way of dealing with this
        cached_release_group.cache_state['mb'] = [1,datetime.utcnow()]
        cached_release_group.save() # TODO: add try in case of ResourceConflict? 
        mmda_logger('db','store', cached_release_group)