Exemplo n.º 1
0
    def getMedia(self, artist):
        amc = artistDBMediaClass()

        amc.media["Singles"] = self.getMediaSingles()
        amc.media["Albums"] = self.getMediaAlbums()

        return amc
Exemplo n.º 2
0
    def getMedia(self):
        amc = artistDBMediaClass()

        mediaTypes = [x.text for x in self.bsdata.findAll("h3")]
        tables = dict(zip(mediaTypes, self.bsdata.findAll("table")))

        for mediaType, table in tables.items():
            headers = [x.text for x in table.findAll("th")]
            trs = table.findAll('tr')
            for tr in trs[1:]:
                tds = tr.findAll("td")

                ## Year
                idx = headers.index("Year")
                year = tds[idx].text

                ## Title
                idx = headers.index("Title")
                refs = [x.attrs['href'] for x in tds[idx].findAll('a')]
                if len(refs) == 0:
                    raise ValueError("No link for album")
                url = refs[0]
                album = tds[idx].text

                m = md5()
                uuid = url.split("/")[-1]
                for val in uuid.split("-"):
                    m.update(val.encode('utf-8'))
                hashval = m.hexdigest()
                code = int(hashval, 16)

                ## Artist
                idx = headers.index("Artist")
                artists = []
                for artistVal in tds[idx].findAll('a'):
                    url = artistVal.attrs['href']
                    name = artistVal.text
                    m = md5()
                    uuid = url.split("/")[-1]
                    for val in uuid.split("-"):
                        m.update(val.encode('utf-8'))
                    hashval = m.hexdigest()
                    artists.append({"name": name, "url": url})

                amdc = artistDBMediaDataClass(album=album,
                                              url=url,
                                              aclass=None,
                                              aformat=None,
                                              artist=artists,
                                              code=code,
                                              year=year)
                if amc.media.get(mediaType) is None:
                    amc.media[mediaType] = []
                amc.media[mediaType].append(amdc)

        return amc
Exemplo n.º 3
0
    def getMedia(self):
        amc = artistDBMediaClass()
        mediaType = "Singles"
        amc.media[mediaType] = []

        table = self.bsdata.find("table")
        if table is not None:
            ths = table.findAll("th")
            ths = [th.text for th in ths]
            trs = table.findAll("tr")

            for itr, tr in enumerate(trs[1:]):
                trackData = dict(zip(ths, tr.findAll("td")))

                trackYear = None
                if trackData.get("Peak Date") is not None:
                    trackYear = trackData["Peak Date"].text[:4]

                if trackData.get("Track") is None:
                    continue

                trackURL = trackData["Track"].find("a")
                if trackURL is not None:
                    trackURL = trackURL.attrs['href']
                trackName = trackData["Track"].text

                trackArtists = []
                for trackArtistData in trackData["With"].findAll("a"):
                    trackArtistURL = trackArtistData.find("a")
                    if trackArtistURL is not None:
                        trackArtistURL = trackArtistURL.attrs['href']
                    trackArtistName = trackData["Track"].text
                    trackArtists.append({
                        "Artist": trackArtistName,
                        "URL": trackArtistURL
                    })

                code = self.dbUtils.getAlbumCode(name=trackName, url=trackURL)

                amdc = artistDBMediaDataClass(album=trackName,
                                              url=trackURL,
                                              aclass=None,
                                              aformat=None,
                                              artist=trackArtists,
                                              code=code,
                                              year=trackYear)
                if amc.media.get(mediaType) is None:
                    amc.media[mediaType] = []
                amc.media[mediaType].append(amdc)

        return amc
Exemplo n.º 4
0
    def getMedia(self, artist):
        amc = artistDBMediaClass()

        albumBlocks = self.bsdata.findAll("div", {"class": "albumBlock"})
        for i, albumBlock in enumerate(albumBlocks):
            #print(i,'/',len(albumBlocks))
            blockData = {}
            for div in albumBlock.findAll("div"):
                attr = div.attrs.get("class")
                key = attr[0] if isinstance(attr, list) else None
                ref = div.find("a")
                val = artistDBLinkClass(
                    ref) if ref is not None else artistDBTextClass(div)
                blockData[key] = val

            urlData = blockData.get("image")
            url = urlData.href if isinstance(urlData,
                                             artistDBLinkClass) else None

            titleData = blockData.get("albumTitle")
            title = titleData.text if isinstance(titleData,
                                                 artistDBTextClass) else None

            yearData = blockData.get("date")
            year = yearData.text if isinstance(yearData,
                                               artistDBTextClass) else None

            mediaTypeData = blockData.get("type")
            mediaType = mediaTypeData.text if isinstance(
                mediaTypeData, artistDBTextClass) else None

            code = self.dutils.getAlbumCode(name=title, url=url)
            amdc = artistDBMediaDataClass(album=title,
                                          url=url,
                                          aclass=None,
                                          aformat=None,
                                          artist="U2",
                                          code=code,
                                          year=year)

            if amc.media.get(mediaType) is None:
                amc.media[mediaType] = []
            amc.media[mediaType].append(amdc)
            if self.debug:
                print("\t\tAdding Media ({0} -- {1})".format(title, url))

        return amc
Exemplo n.º 5
0
    def getMedia(self, artist):
        amc = artistDBMediaClass()

        mediaType = "Albums"
        amc.media[mediaType] = []

        for table in self.bsdata.findAll("table"):
            trs = table.findAll("tr")
            for itr, tr in enumerate(trs):
                tds = tr.findAll("td")
                for itd, td in enumerate(tds):
                    div = td.find("div", {"class": "wrap"})
                    if div is not None:
                        name = div.text
                        if name.startswith("Album: "):
                            mediaType = "Album"
                            title = name[7:]
                        else:
                            mediaType = "Single"
                            title = name
                        if amc.media.get(mediaType) is None:
                            amc.media[mediaType] = []

                        code = self.dbUtils.getAlbumCode(name=title,
                                                         url=mediaType)

                        amdc = artistDBMediaDataClass(album=title,
                                                      url=None,
                                                      aclass=None,
                                                      aformat=None,
                                                      artist=artist,
                                                      code=code,
                                                      year=None)
                        if amc.media.get(mediaType) is None:
                            amc.media[mediaType] = []
                        amc.media[mediaType].append(amdc)

        return amc
Exemplo n.º 6
0
    def getMedia(self, artist):
        amc  = artistDBMediaClass()
        name = "Albums"
        amc.media[name] = []
        
        script = self.bsdata.find("script", {"data-name": "initial-state"})
        if script is None:
            print("No media data")
            return amc
        
        try:
            jdata = json.loads(script.contents[0])
        except:
            print("Could not load JSON data")
            return amc

        mediaData = self.getArtistMediaData(jdata)
        for mediaType,mediaTypeData in mediaData.items():
            if amc.media.get(mediaType) is None:
                amc.media[mediaType] = []
            for amdc in mediaTypeData:
                amc.media[mediaType].append(amdc)

        return amc
Exemplo n.º 7
0
    def getMedia(self, artist, url):
        amc = artistDBMediaClass()

        mediadatas = self.bsdata.findAll("div", {"id": "discography"})
        for mediadata in mediadatas:
            h3s = mediadata.findAll("h3", {"class": "disco_header_label"})
            categories = [x.text for x in h3s]

            sufs = mediadata.findAll("div", {"class": "disco_showing"})
            spans = [x.find("span") for x in sufs]
            ids = [x.attrs['id'] for x in spans]
            letters = [x[-1] for x in ids]

            for mediaType, suffix in dict(zip(categories, letters)).items():
                categorydata = mediadata.find(
                    "div", {"id": "disco_type_{0}".format(suffix)})
                albumdatas = categorydata.findAll("div",
                                                  {"class": "disco_release"})
                for albumdata in albumdatas:

                    ## Code
                    codedata = albumdata.attrs['id']
                    code = codedata.split("_")[-1]
                    try:
                        int(code)
                    except:
                        code = None

                    ## Title
                    mainline = albumdata.find("div",
                                              {"class": "disco_mainline"})
                    maindata = self.getNamesAndURLs(mainline)
                    try:
                        album = maindata[0].name
                    except:
                        album = None

                    try:
                        albumurl = maindata[0].url
                    except:
                        albumurl = None

                    ## Year
                    yeardata = albumdata.find("span",
                                              {"class": "disco_year_y"})
                    if yeardata is None:
                        yeardata = albumdata.find("span",
                                                  {"class": "disco_year_ymd"})

                    year = None
                    if yeardata is not None:
                        year = yeardata.text

                    ## Artists
                    artistdata = albumdata.findAll("span")[-1]
                    albumartists = self.getNamesAndURLs(artistdata)
                    if len(albumartists) == 0:
                        albumartists = [
                            artistDBURLInfo(name=artist.name,
                                            url=url.url.replace(
                                                "https://rateyourmusic.com",
                                                ""),
                                            ID=None)
                        ]

                    amdc = artistDBMediaDataClass(album=album,
                                                  url=album,
                                                  aclass=None,
                                                  aformat=None,
                                                  artist=albumartists,
                                                  code=code,
                                                  year=year)
                    if amc.media.get(mediaType) is None:
                        amc.media[mediaType] = []
                    amc.media[mediaType].append(amdc)
                    #if self.debug:
                    #    print("Found Album: [{0}/{1}] : {2}  /  {3}".format(len(amc.media[mediaType]), mediaType, code, album, album))

        classicalMedia = self.getClassicalMedia(artist, url)
        if len(classicalMedia) > 0:
            amc.media.update(classicalMedia)
        creditsMedia = self.getCreditsMedia(artist, url)
        if len(creditsMedia) > 0:
            amc.media.update(creditsMedia)

        return amc
Exemplo n.º 8
0
    def getData(self, inputdata):
        self.getDataBase(inputdata)
        self.checkData()
        
        if self.dbdata is not None:
            return self.dbdata
        if not isinstance(self.bsdata, dict):
            raise ValueError("Could not parse Discogs API data")
            
        artistData = self.bsdata["Artist"]
        albumsData = self.bsdata["Albums"]
        
        artistID   = artistData.name
        artistName  = artistData["name"]
        url         = "https://www.discogs.com/artist/{0}".format(artistID)

        generalData = {}
        generalData["RealName"]   = artistData["realname"]
        generalData["Aliases"]    = artistData["MasterAliases"]
        generalData["Groups"]     = artistData["MasterGroups"]
        generalData["Members"]    = artistData["MasterMembers"]
        generalData["Variations"] = artistData["MasterNameVariations"]
        generalData = {k: v for k,v in generalData.items() if v is not None}
        generalData = generalData if len(generalData) > 0 else None
            

        ########################################################################
        # Get Releases
        ########################################################################
        mediaData = {}
        if isinstance(albumsData,list):
            for item in albumsData:
                code        = item.get('id')
                albumType   = item.get('type')
                albumFormat = item.get('format')
                albumLabel  = item.get('label')
                albumName   = item.get('name')
                albumURL    = item.get('url')
                albumRole   = item.get('role')
                albumArtist = item.get('artist')
                albumYear   = item.get('year')
                albumMain   = item.get('main_release')

                mediaName = self.getMediaType(item)

                amdc = artistDBMediaDataClass(album=albumName, url=albumURL, artist=albumArtist, code=code, aformat=albumFormat, aclass={"Label": albumLabel, "Main": albumMain}, year=albumYear)
                if mediaData.get(mediaName) is None:
                    mediaData[mediaName] = []
                mediaData[mediaName].append(amdc)
        elif isinstance(albumsData,dict):
            mediaData = albumsData
        else:
            raise ValueError("Not sure how to process albums [{0}]".format(albumsData))
            
            
        artist      = artistDBNameClass(name=artistName, err=None)
        meta        = artistDBMetaClass(title=None, url=url)
        url         = artistDBURLClass(url=url)
        ID          = artistDBIDClass(ID=artistID)
        pages       = artistDBPageClass(ppp=1, tot=1, redo=False, more=False)
        profile     = artistDBProfileClass(general=generalData)
        media       = artistDBMediaClass()
        media.media = mediaData
        mediaCounts = self.getMediaCounts(media)
        info        = artistDBFileInfoClass(info=None)
        
        adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info)
        
        return adc
Exemplo n.º 9
0
    def getData(self, inputdata):
        self.getDataBase(inputdata)
        self.checkData()

        if self.dbdata is not None:
            return self.dbdata
        if not isinstance(self.bsdata, dict):
            raise ValueError("Could not parse Spotify API data")

        artistData = self.bsdata['Artist']
        artistID = artistData.name
        artistURI = artistData.get('uri')
        artistType = artistData.get('stype')
        artistPopularity = artistData.get('popularity')
        artistName = artistData.get('name')
        artistAPIURL = artistData.get('href')
        artistGenres = artistData.get('genres', [])
        artistFollowers = artistData.get('followers')
        artistURL = artistData.get('urls', {}).get('spotify')

        generalData = {"Type": artistType}
        genresData = artistGenres if len(artistGenres) > 0 else None
        externalData = {'SpotifyAPI': {"URL": artistAPIURL, "URI": artistURI}}
        extraData = {
            'Followers': artistFollowers,
            "Popularity": artistPopularity
        }

        mediaData = {}
        albumsData = self.bsdata['Albums']
        if len(albumsData) > 0:
            albumsURL = albumsData.get('href')
            if albumsData.get('artistID') != artistID:
                raise ValueError(
                    "ArtistIDs do not match for Spotify API Data! [{0}, {1}]".
                    format(albumsData.get('artistID'), artistID))

            mediaData = {}
            for albumData in albumsData.get('albums', []):
                albumID = albumData.get('sid')
                albumGroup = albumData.get('album_group')
                albumType = albumData.get('album_type')
                albumSType = albumData.get('stype')
                albumArtists = [{
                    artist['sid']: artist['name']
                } for artist in albumData.get('artists', [])]
                albumURL = albumData.get('urls', {}).get('spotify')
                albumURI = albumData.get('uri')
                albumAPI = albumData.get('href')
                albumName = albumData.get('name')
                albumTracks = albumData.get('numtracks')
                albumDate = albumData.get('date')
                try:
                    albumYear = to_datetime(
                        albumDate).year if albumDate is not None else None
                except:
                    albumYear = None

                if all([albumGroup, albumType]):
                    mediaName = " + ".join([albumGroup, albumType])
                elif albumGroup is not None:
                    mediaName = albumGroup
                elif albumType is not None:
                    mediaName = albumType
                else:
                    mediaName = "Unknown"

                amdc = artistDBMediaDataClass(album=albumName,
                                              url=albumURL,
                                              artist=albumArtists,
                                              code=albumID,
                                              year=albumYear,
                                              aclass=albumSType,
                                              aformat={
                                                  "URI": albumURI,
                                                  "API": albumAPI,
                                                  "Date": albumDate,
                                                  "NumTracks": albumTracks
                                              })
                if mediaData.get(mediaName) is None:
                    mediaData[mediaName] = []
                mediaData[mediaName].append(amdc)

        artist = artistDBNameClass(name=artistName, err=None)
        meta = artistDBMetaClass(title=None, url=artistURL)
        url = artistDBURLClass(url=artistURL)
        ID = artistDBIDClass(ID=artistID)
        pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False)
        profile = artistDBProfileClass(general=generalData,
                                       external=externalData,
                                       extra=extraData,
                                       genres=genresData)
        media = artistDBMediaClass()
        media.media = mediaData
        mediaCounts = self.getMediaCounts(media)
        info = self.getInfo()

        adc = artistDBDataClass(artist=artist,
                                meta=meta,
                                url=url,
                                ID=ID,
                                pages=pages,
                                profile=profile,
                                mediaCounts=mediaCounts,
                                media=media,
                                info=info)

        return adc
Exemplo n.º 10
0
    def getMedia(self, artist):
        amc = artistDBMediaClass()
        name = "Albums"
        amc.media[name] = []

        jdata = None
        for meta in self.bsdata.findAll("meta"):
            content = meta.attrs['content']
            if content.startswith("{") and content.endswith("}"):
                try:
                    jdata = json.loads(content)
                except:
                    continue
                break

        if jdata is not None:

            try:
                artistName = jdata['artist']['name']
            except:
                artistName = None

            mediaType = "Albums"
            if jdata.get('artist_albums') is not None:
                for albumData in jdata['artist_albums']:
                    albumName = albumData['name']
                    albumID = albumData['id']
                    try:
                        albumYear = albumData['release_date_components'][
                            'year']
                    except:
                        albumYear = None

                    if amc.media.get(mediaType) is None:
                        amc.media[mediaType] = []
                    amdc = artistDBMediaDataClass(album=albumName,
                                                  url=None,
                                                  aclass=None,
                                                  aformat=None,
                                                  artist=[artistName],
                                                  code=albumID,
                                                  year=albumYear)
                    amc.media[mediaType].append(amdc)

            mediaType = "Singles"
            if jdata.get('artist_songs') is not None:
                for songData in jdata['artist_songs']:
                    songName = songData['title']
                    songID = songData['id']

                    if amc.media.get(mediaType) is None:
                        amc.media[mediaType] = []
                    amdc = artistDBMediaDataClass(album=songName,
                                                  url=None,
                                                  aclass=None,
                                                  aformat=None,
                                                  artist=[artistName],
                                                  code=songID,
                                                  year=None)
                    amc.media[mediaType].append(amdc)

        return amc
Exemplo n.º 11
0
    def getMedia(self, artist):
        amc = artistDBMediaClass()
        name = "Albums"
        amc.media[name] = []

        mediaType = "Albums"

        albumsection = self.bsdata.find("section",
                                        {"id": "artist-albums-section"})
        if albumsection is None:
            if self.debug:
                print("\t\tNo Album Section!")
            amc.media[mediaType] = []
            return amc

            raise ValueError("Cannot find album section!")

        ols = albumsection.findAll(
            "ol", {"class": "buffer-standard"}
        )  # resource-list--release-list resource-list--release-list--with-20"})
        if self.debug:
            print("\t\tFound {0} Resource Lists".format(len(ols)))
        for ol in ols:
            lis = ol.findAll(
                "li", {"class": "resource-list--release-list-item-wrap"})
            for il, li in enumerate(lis):
                h3 = li.find(
                    "h3", {"class": "resource-list--release-list-item-name"})
                if h3 is None:
                    if self.debug:
                        print(
                            "\t\tNo <h3> in artist list section ({0}/{1}): {2}"
                            .format(il, len(lis), li))
                    continue
                    raise ValueError(
                        "No <h3> in artist list section ({0}/{1}): {2}".format(
                            il, len(lis), li))
                linkdata = self.getNamesAndURLs(h3)
                if len(linkdata) == 0:
                    continue
                #print(linkdata[0].get())

                ## Name
                album = linkdata[0].name

                #amdc = artistDBMediaDataClass(album=album, url=url, aclass=None, aformat=None, artist=None, code=code, year=year)

                ## URL
                url = linkdata[0].url

                ## Code
                code = self.dutils.getArtistID(album)

                ## Year
                year = None
                codedatas = li.findAll(
                    "p",
                    {"class", "resource-list--release-list-item-aux-text"})
                if len(codedatas) == 2:
                    codedata = codedatas[1].text
                    vals = [x.strip() for x in codedata.split("\n")]
                    if len(vals) == 5:
                        try:
                            year = vals[2][:-2]
                            year = year.split()[-1]
                            year = int(year)
                        except:
                            year = None

                amdc = artistDBMediaDataClass(album=album,
                                              url=url,
                                              aclass=None,
                                              aformat=None,
                                              artist=[artist.name],
                                              code=code,
                                              year=year)
                if amc.media.get(mediaType) is None:
                    amc.media[mediaType] = []
                amc.media[mediaType].append(amdc)
                if self.debug:
                    print("\t\tAdding Media ({0} -- {1})".format(album, url))

        return amc
Exemplo n.º 12
0
    def getMedia(self):
        amc = artistDBMediaClass()
        mediaType = "Videos"
        amc.media[mediaType] = []

        for table in self.bsdata.findAll("table"):
            trs = table.findAll("tr")
            ths = [th.text for th in table.findAll("th")]
            for tr in trs[1:]:
                td = tr.find('td')
                ref = td.find("a")
                name = td.text
                url = None
                if ref is not None:
                    url = ref.attrs['href']

                #https://kworb.net/youtube/video/fRh_vgS2dFE.html
                trackURL = "https://kworb.net/youtube/video/{0}.html".format(
                    getBaseFilename(url))

                songData = name.split(' - ')
                artistName = songData[0]
                trackName = " - ".join(songData[1:])

                removes = []
                removes = [
                    "(Official Music Video)", "(Official Lyric Video)",
                    "(Official Video (Short Version))", "(Official Video)",
                    "[Lyric Video]", "(Video Version)",
                    "[Official Music Video]", "(Official Audio)",
                    "(Shazam Version)", "(Explicit)", "(Dance Video)",
                    "(Lyric Video)", "[Official Video]",
                    "(Official Dance Video)", '(Acoustic)', '(Audio)',
                    '(Visualizer)', '(Video Commentary)', '(VEVO Footnotes)',
                    '(Choir Version)', '(Fan Lip Sync Version)', '(Trailer)',
                    '(Teaser)'
                ]
                for rmText in removes:
                    trackName = trackName.replace(rmText, "").strip()
                while trackName.find("  ") != -1:
                    trackName = trackName.replace("  ", " ")
                    if len(trackName) <= 1:
                        break

                if len(trackName.strip()) == 0:
                    continue

                code = self.dbUtils.getAlbumCode(name=trackName, url=trackURL)

                amdc = artistDBMediaDataClass(album=trackName,
                                              url=trackURL,
                                              aclass=None,
                                              aformat=None,
                                              artist=artistName,
                                              code=code,
                                              year=None)
                if amc.media.get(mediaType) is None:
                    amc.media[mediaType] = []
                amc.media[mediaType].append(amdc)

        return amc
Exemplo n.º 13
0
    def getData(self, inputdata):
        self.getDataBase(inputdata)
        self.checkData()

        if self.dbdata is not None:
            return self.dbdata
        if not isinstance(self.bsdata, dict):
            raise ValueError("Could not parse Deezer API data")

        artist = self.bsdata

        artistTracks = artist["Tracks"]
        artistAlbums = artist["Albums"]
        artistName = artist["Name"]
        artistID = artist["ID"]
        artistURL = artist["URL"]
        generalData = {"Type": artist["Type"]}

        mediaData = {}
        mediaName = "Tracks"
        mediaData[mediaName] = []
        for code, artistTrack in artistTracks.items():
            album = artistTrack["Name"]
            albumURL = artistTrack["URL"]
            albumArtists = [artistName]

            amdc = artistDBMediaDataClass(album=album,
                                          url=albumURL,
                                          artist=albumArtists,
                                          code=code,
                                          year=None)
            mediaData[mediaName].append(amdc)

        mediaData = {}
        mediaName = "Albums"
        mediaData[mediaName] = []
        for code, artistAlbum in artistAlbums.items():
            album = artistAlbum["Name"]
            albumURL = artistAlbum["URL"]
            albumArtists = [artistName]

            amdc = artistDBMediaDataClass(album=album,
                                          url=albumURL,
                                          artist=albumArtists,
                                          code=code,
                                          year=None)
            mediaData[mediaName].append(amdc)

        artist = artistDBNameClass(name=artistName, err=None)
        meta = artistDBMetaClass(title=None, url=artistURL)
        url = artistDBURLClass(url=artistURL)
        ID = artistDBIDClass(ID=artistID)
        pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False)
        profile = artistDBProfileClass(general=generalData)
        media = artistDBMediaClass()
        media.media = mediaData
        mediaCounts = self.getMediaCounts(media)
        info = self.getInfo()

        adc = artistDBDataClass(artist=artist,
                                meta=meta,
                                url=url,
                                ID=ID,
                                pages=pages,
                                profile=profile,
                                mediaCounts=mediaCounts,
                                media=media,
                                info=info)

        return adc
Exemplo n.º 14
0
    def getMedia(self):
        amc = artistDBMediaClass()
        
        table = self.bsdata.find("table", {"id": "artist"})
        if table == None:
            amc.err="NoMedia"
            return amc

        name  = None
        for tr in table.findAll("tr"):
            h3 = tr.find("h3")
            if h3:
                name = h3.text
                amc.media[name] = []
                continue


            # Album, Class, Format
            result = tr.find("td", {"class": "title"})
            album  = None
            url    = None
            albumformat = name
            if result:
                retval      = self.getMediaAlbum(result)
                album       = fixName(retval.album)
                url         = retval.url
                albumformat = retval.aformat

            if album == None:
                continue

            # Code
            code = tr.attrs.get("data-object-id")

            # AlbumClass
            albumclass = tr.attrs.get("data-object-type")

            # AlbumURL
            result  = tr.find("td", {"class": "artist"})
            artists = None
            if result:
                artists = self.getNamesAndURLs(result)

            # Year
            result = tr.find("td", {"class": "year"})
            year   = None
            if result:
                year = result.text

            if name is None:
                name = "Albums"
                amc.media[name] = []
            amdc = artistDBMediaDataClass(album=album, url=url, aclass=albumclass, aformat=albumformat, artist=artists, code=code, year=year)
            amc.media[name].append(amdc)
            #if debug: print "  Found album:",album,"of type:",name


        if False:
            newMedia = {}
            for name,v in media.items():
                newMedia[name] = {}
                for item in v:
                    code = item['Code']
                    del item['Code']
                    newMedia[name][code] = item

            media = newMedia

        return amc
Exemplo n.º 15
0
    def getMedia(self, artist):
        amc = artistDBMediaClass()

        mediaType = "Albums"
        amc.media[mediaType] = []

        artistSection = self.bsdata.find("section", {"id": "album-artist"})
        if artistSection is None:
            pass
            #raise ValueError("Cannot find Artist Section")
        else:
            articles = artistSection.findAll("article")
            for ia, article in enumerate(articles):
                ref = article.find('a')
                if ref is None:
                    raise ValueError("No ref in article")
                albumURL = ref.attrs['href']

                caption = ref.find("figcaption")
                if caption is None:
                    raise ValueError("No figcaption in article")

                b = caption.find("b")
                if b is None:
                    raise ValueError("No bold in caption")

                i = caption.find("i")
                if i is None:
                    raise ValueError("No italics in caption")

                albumName = b.text
                albumYear = i.text

                m = md5()
                for val in albumURL.split("/"):
                    m.update(val.encode('utf-8'))
                hashval = m.hexdigest()
                code = str(int(hashval, 16) % int(1e9))

                artists = [artist.name]

                amdc = artistDBMediaDataClass(album=albumName,
                                              url=albumURL,
                                              aclass=None,
                                              aformat=None,
                                              artist=artists,
                                              code=code,
                                              year=albumYear)
                if amc.media.get(mediaType) is None:
                    amc.media[mediaType] = []
                amc.media[mediaType].append(amdc)

        mediaType = "Songs"
        amc.media[mediaType] = []

        singlesSection = self.bsdata.find("ol", {"id": "songs-list"})
        if singlesSection is None:
            pass
            #raise ValueError("Cannot find Singles Section")
        else:
            lis = singlesSection.findAll("li")
            for li in lis:
                ref = li.find('a')
                if ref is None:
                    raise ValueError("No ref in article")
                albumURL = ref.attrs['href']

                b = ref.find("b")
                if b is None:
                    raise ValueError("No bold in ref")

                albumName = b.text
                albumYear = None

                m = md5()
                for val in albumURL.split("/"):
                    m.update(val.encode('utf-8'))
                hashval = m.hexdigest()
                code = str(int(hashval, 16) % int(1e10))

                artists = [artist.name]

                amdc = artistDBMediaDataClass(album=albumName,
                                              url=albumURL,
                                              aclass=None,
                                              aformat=None,
                                              artist=artists,
                                              code=code,
                                              year=albumYear)
                if amc.media.get(mediaType) is None:
                    amc.media[mediaType] = []
                amc.media[mediaType].append(amdc)

        return amc
Exemplo n.º 16
0
    def getData(self, inputdata):
        self.getDataBase(inputdata)
        self.checkData()

        if self.dbdata is not None:
            return self.dbdata
        if not isinstance(self.bsdata, dict):
            raise ValueError("Could not parse LastFM API data")

        tracks = self.bsdata["Tracks"]
        albums = self.bsdata["Albums"]
        if len(tracks) > 0:
            artistData = {
                "Name": tracks[0]["artistName"],
                "URL": tracks[0]["artistURL"],
                "MBID": tracks[0]["artistMBID"]
            }
        elif len(albums) > 0:
            artistData = {
                "Name": albums[0]["artistName"],
                "URL": albums[0]["artistURL"],
                "MBID": albums[0]["artistMBID"]
            }
        else:
            return None
            raise ValueError("No track/album data!")

        artistName = artistData["Name"]
        artistURL = artistData["URL"]
        artistID = self.dbUtils.getArtistID(artistURL)
        generalData = None
        externalData = {"MusicBrainzID": artistData["MBID"]}
        #mbID       = mbutil.getArtistID(artistData['MBID']

        trackData = [{
            "Name": track["name"],
            "URL": track["URL"],
            "Counts": int(track["counts"])
        } for track in tracks if int(track["counts"]) > 50]
        counts = sorted([x["Counts"] for x in trackData], reverse=True)
        idx = min([len(counts) - 1, 1000 - 1])
        trackData = [v for v in trackData if v['Counts'] >= counts[idx]]

        albumData = [{
            "Name": album["name"],
            "URL": album["URL"],
            "Counts": int(album["counts"])
        } for album in albums if int(album["counts"]) > 25]
        counts = sorted([x["Counts"] for x in albumData], reverse=True)
        idx = min([len(counts) - 1, 1000 - 1])
        albumData = [v for v in albumData if v['Counts'] >= counts[idx]]

        mediaData = {}
        if len(trackData) > 0:
            mediaName = "Tracks"
            mediaData[mediaName] = []
            for artistTrack in trackData:
                m = md5()
                m.update(artistTrack['Name'].encode('utf-8'))
                m.update(artistTrack['URL'].encode('utf-8'))
                hashval = m.hexdigest()
                code = str(int(hashval, 16) % int(1e7))

                album = artistTrack["Name"]
                albumURL = artistTrack["URL"]
                albumArtists = [artistData["Name"]]

                amdc = artistDBMediaDataClass(album=album,
                                              url=albumURL,
                                              artist=albumArtists,
                                              code=code,
                                              year=None)
                mediaData[mediaName].append(amdc)

        if len(albumData) > 0:
            mediaName = "Albums"
            mediaData[mediaName] = []
            for artistAlbum in albumData:
                m = md5()
                m.update(artistAlbum['Name'].encode('utf-8'))
                m.update(artistAlbum['URL'].encode('utf-8'))
                hashval = m.hexdigest()
                code = str(int(hashval, 16) % int(1e7))

                album = artistAlbum["Name"]
                albumURL = artistAlbum["URL"]
                albumArtists = [artistName]

                amdc = artistDBMediaDataClass(album=album,
                                              url=albumURL,
                                              artist=albumArtists,
                                              code=code,
                                              year=None)
                mediaData[mediaName].append(amdc)

        artist = artistDBNameClass(name=artistName, err=None)
        meta = artistDBMetaClass(title=None, url=artistURL)
        url = artistDBURLClass(url=artistURL)
        ID = artistDBIDClass(ID=artistID)
        pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False)
        profile = artistDBProfileClass(general=generalData,
                                       external=externalData)
        media = artistDBMediaClass()
        media.media = mediaData
        mediaCounts = self.getMediaCounts(media)
        info = self.getInfo()

        adc = artistDBDataClass(artist=artist,
                                meta=meta,
                                url=url,
                                ID=ID,
                                pages=pages,
                                profile=profile,
                                mediaCounts=mediaCounts,
                                media=media,
                                info=info)

        return adc
Exemplo n.º 17
0
    def getMedia(self, url):
        amc = artistDBMediaClass()
        name = "Albums"
        if url is None:
            name = "Unknown"
        else:
            if url.find("/credits") != -1:
                name = "Credits"
            if url.find("/songs") != -1:
                name = "Songs"
            if url.find("/compositions") != -1:
                name = "Compositions"
        amc.media[name] = []

        tables = self.bsdata.findAll("table")
        for table in tables:
            trs = table.findAll("tr")

            header = trs[0]
            ths = header.findAll("th")
            headers = [x.text.strip() for x in ths]
            if len(headers) == 0:
                continue

            for tr in trs[1:]:
                tds = tr.findAll("td")

                ## Name
                key = "Name"
                try:
                    if len(headers[1]) == 0:
                        idx = 1
                        mediaType = tds[idx].text.strip()
                        if len(mediaType) == 0:
                            mediaType = name
                    else:
                        mediaType = name
                except:
                    #print("Error getting key: {0} from AllMusic media".format(key))
                    break

                ## Year
                key = "Year"
                try:
                    idx = headers.index(key)
                    year = tds[idx].text.strip()
                except:
                    #print("Error getting key: {0} from AllMusic media".format(key))
                    continue

                ## Title
                key = "Album"
                try:
                    idx = headers.index(key)
                    ref = tds[idx].findAll("a")
                except:
                    #print("Error getting key: {0} from AllMusic media".format(key))
                    continue

                try:
                    refdata = ref[0]
                    url = refdata.attrs['href']
                    album = refdata.text.strip()

                    data = url.split("/")[-1]
                    pos = data.rfind("-")
                    discIDurl = data[(pos + 3):]
                    discID = discIDurl.split("/")[0]

                    try:
                        int(discID)
                        code = discID
                    except:
                        code = None
                except:
                    url = None
                    code = None
                    continue

                amdc = artistDBMediaDataClass(album=album,
                                              url=url,
                                              aclass=None,
                                              aformat=None,
                                              artist=None,
                                              code=code,
                                              year=year)
                if amc.media.get(mediaType) is None:
                    amc.media[mediaType] = []
                amc.media[mediaType].append(amdc)

        compMedia = self.getMediaCompositions()
        for mediaType, mediaTypeData in compMedia.items():
            if amc.media.get(mediaType) is None:
                amc.media[mediaType] = []
            amc.media[mediaType] += mediaTypeData

        songMedia = self.getMediaSongs()
        for mediaType, mediaTypeData in songMedia.items():
            if amc.media.get(mediaType) is None:
                amc.media[mediaType] = []
            amc.media[mediaType] += mediaTypeData

        return amc