示例#1
0
    def getName(self):
        artistData = self.bsdata.find("h1", {"class": "artist_name_hdr"})
        if artistData is None:
            anc = artistDBNameClass(err="No H1")
            return anc

        span = artistData.find("span")
        if span is None:
            artistName = artistData.text.strip()
            artistNativeName = artistName
        else:
            artistName = span.text.strip()
            artistData = removeTag(artistData, span)
            artistNativeName = artistData.text.strip(
            )  #.replace(artistName, "").strip()

        if len(artistName) > 0:
            artistName = fixName(artistName)
            artistNativeName = fixName(artistNativeName)

            if artistName.endswith("]"):
                artistName = artistName.split(" [")[0].strip()
            if artistNativeName.endswith("]"):
                artistNativeName = artistNativeName.split(" [")[0].strip()

            anc = artistDBNameClass(name=artistName,
                                    native=artistNativeName,
                                    err=None)
        else:
            anc = artistDBNameClass(name=artistName, err="Fix")

        return anc
示例#2
0
    def getName(self):
        jdata = None
        for meta in self.bsdata.findAll("meta"):
            content = meta.attrs['content']
            if content.startswith("{") and content.endswith("}"):
                try:
                    jdata = json.loads(content)
                except:
                    continue
                break

        artistName = None
        if jdata is not None:
            try:
                artistName = jdata['artist']['name']
            except:
                anc = artistDBNameClass(name=None, err="BadJSON")
                return anc
        else:
            anc = artistDBNameClass(name=None, err="NoJSON")
            return anc

        latinName, nativeName = self.splitNativeName(artistName)
        anc = artistDBNameClass(name=latinName, native=nativeName, err=None)
        return anc
示例#3
0
    def getName(self):
        h1 = self.bsdata.find("h1", {"class": 'artistHeadline'})
        artistName = h1.text if h1 is not None else None
        if artistName is not None:
            bracketValues = regex.findall(r'\[(.*?)\]+', artistName)
            if len(bracketValues) > 0:
                ignores = [
                    'rap', '2', '3', '4', 'NOR', 'US', 'unknown Artist', 'CHE',
                    'email\xa0protected', '70s', '60s', '80s', '90s', 'BRA',
                    'SWE', 'France', 'FR', 'UK', 'JP', 'DE', 'USA', 'RUS',
                    'ARG', 'DEU'
                ]
                for ignore in ignores:
                    arg = " [{0}]".format(ignore)
                    if arg in artistName:
                        artistName = artistName.replace(arg, "")
                bracketValues = regex.findall(r'\[(.*?)\]+', artistName)

            artistName = " & ".join(
                bracketValues) if len(bracketValues) > 0 else artistName
            anc = artistDBNameClass(name=artistName, err=None)
            return anc
        else:
            script = self.bsdata.find("script",
                                      {"type": "application/ld+json"})
            if script is None:
                anc = artistDBNameClass(name=None, err="NoJSON")
                return anc

            try:
                artist = json.loads(script.contents[0])["name"]
            except:
                anc = artistDBNameClass(name=None, err="CouldNotCompileJSON")
                return anc

            artistName = artist
            bracketValues = regex.findall(r'\[(.*?)\]+', artistName)
            if len(bracketValues) > 0:
                ignores = [
                    'rap', '2', '3', '4', 'NOR', 'US', 'unknown Artist', 'CHE',
                    'email\xa0protected', '70s', '60s', '80s', '90s', 'BRA',
                    'SWE', 'France', 'FR', 'UK', 'JP', 'DE', 'USA', 'RUS',
                    'ARG', 'DEU'
                ]
                for ignore in ignores:
                    arg = " [{0}]".format(ignore)
                    if arg in artistName:
                        artistName = artistName.replace(arg, "")
                bracketValues = regex.findall(r'\[(.*?)\]+', artistName)

            artistName = " & ".join(
                bracketValues) if len(bracketValues) > 0 else artistName
            anc = artistDBNameClass(name=artistName, err=None)
            return anc
示例#4
0
    def getName(self):
        h1 = self.bsdata.find("h1", {"id": "naboo_artist_name"})
        if h1 is None:
            anc = artistDBNameClass(name=None, err="NoH1")
            return anc

        span = h1.find("span", {"itemprop": "name"})
        if span is None:
            anc = artistDBNameClass(name=None, err="NoSpan")
            return anc

        artist = span.text
        anc = artistDBNameClass(name=artist, err=None)
        return anc
示例#5
0
    def getName(self):
        artistData = self.bsdata.find("section", {"id": "artist-info"})
        if artistData is None:
            anc = artistDBNameClass(err=True)
            return anc

        h1 = artistData.find("h1")
        if h1 is None:
            anc = artistDBNameClass(err="NoH1")
            return anc

        artistName = h1.text
        anc = artistDBNameClass(name=artistName, err=None)
        return anc
示例#6
0
 def getName(self):
     title = self.bsdata.find("strong", {"class": "pagetitle"})
     artistName = None
     if title is not None:
         artistName = title.text.replace(" - Spotify Chart History", "")
     anc = artistDBNameClass(name=artistName, err=None)
     return anc
示例#7
0
 def getName(self):
     title = self.bsdata.find("span", {"class": "pagetitle"})
     artistName = None
     if title is not None:
         artistName = title.text.split(" | ")[0].strip()
     anc = artistDBNameClass(name=artistName, err=None)
     return anc
示例#8
0
    def getName(self):
        artistData = self.bsdata.find("div", {"class": "artistheader"})
        if artistData is None:
            anc = artistDBNameClass(err=True)
            return anc

        h1 = artistData.find("h1")
        if h1 is None:
            anc = artistDBNameClass(err="NoH1")

        ref = self.getNamesAndURLs(h1)
        try:
            artistName = ref[0].name
            anc = artistDBNameClass(name=artistName, err=None)
        except:
            anc = artistDBNameClass(err="TxtErr")

        return anc
示例#9
0
    def getName(self):
        artistBios = self.bsdata.findAll("div",
                                         {"class": "artist-bio-container"})
        if len(artistBios) > 0:
            for div in artistBios:
                h1 = div.find("h1", {"class": "artist-name"})
                if h1 is not None:
                    artistName = h1.text.strip()
                    if len(artistName) > 0:
                        artist = fixName(artistName)
                        anc = artistDBNameClass(name=artist, err=None)
                    else:
                        artist = "?"
                        anc = artistDBNameClass(name=artist, err="Fix")
                else:
                    anc = artistDBNameClass(err="NoH1")
        else:
            anc = artistDBNameClass(err=True)
            return anc

        return anc
示例#10
0
    def getName(self):
        result1 = self.bsdata.find("h1", {'class':'hide_desktop'})
        result2 = self.bsdata.find("h1", {'class':'hide_mobile'})
        if result1 and not result2:
            result = result1
        elif result2 and not result1:
            result = result2
        elif result1 and result2:
            result = result1
        else:        
            anc = artistDBNameClass(err=True)
            return anc

        if result:
            artist = result.text
            if len(artist) > 0:
                artist = fixName(artist)
                anc = artistDBNameClass(name=artist, err=None)
            else:
                result = self.bsdata.find("script", {"id": "artist_schema"})
                if result is None:
                    anc = artistDBNameClass(name=artist, err="Fix")
                else:
                    try:
                        artist = fixName(json.loads(result.text)["name"])
                        anc = artistDBNameClass(name=artist, err=None)
                    except:
                        anc = artistDBNameClass(name=artist, err="JSON")
        else:
            anc = artistDBNameClass(err="NoH1")

        return anc
示例#11
0
    def getName(self):
        script = self.bsdata.find("script", {"data-name": "initial-state"})
        if script is None:
            anc = artistDBNameClass(name=None, err = "NoJSON")
            return anc
        
        try:
            jdata = json.loads(script.contents[0])
        except:
            anc = artistDBNameClass(name=None, err = "BadJSON")
            return anc
        

        artistName = None
        for artistID, artistData in jdata['artists']['artists'].items():
            artistName = artistData["artistName"]
            break

        if artistName is not None:
            anc = artistDBNameClass(name=artistName, err=None)
            return anc

        anc = artistDBNameClass(name=None, err = "NoArtistName")
        return anc
示例#12
0
    def getName(self):
        try:
            artistdiv = self.bsdata.find("script",
                                         {"id": 'initial-tealium-data'})
            artistdata = artistdiv.attrs['data-tealium-data']
        except:
            artistdata = None

        if artistdata is None:
            try:
                artistdiv = self.bsdata.find("div", {"id": "tlmdata"})
                artistdata = artistdiv.attrs['data-tealium-data']
            except:
                anc = artistDBNameClass(name=None, err="NoTealiumData")

        try:
            artistvals = json.loads(artistdata)
            artist = artistvals["musicArtistName"]
        except:
            anc = artistDBNameClass(name=None, err="NoArtistName")
            return anc

        anc = artistDBNameClass(name=artist, err=None)
        return anc
示例#13
0
    def getData(self, inputdata):
        self.getDataBase(inputdata)
        self.checkData()

        if self.dbdata is not None:
            return self.dbdata
        if not isinstance(self.bsdata, dict):
            raise ValueError("Could not parse LastFM API data")

        tracks = self.bsdata["Tracks"]
        albums = self.bsdata["Albums"]
        if len(tracks) > 0:
            artistData = {
                "Name": tracks[0]["artistName"],
                "URL": tracks[0]["artistURL"],
                "MBID": tracks[0]["artistMBID"]
            }
        elif len(albums) > 0:
            artistData = {
                "Name": albums[0]["artistName"],
                "URL": albums[0]["artistURL"],
                "MBID": albums[0]["artistMBID"]
            }
        else:
            return None
            raise ValueError("No track/album data!")

        artistName = artistData["Name"]
        artistURL = artistData["URL"]
        artistID = self.dbUtils.getArtistID(artistURL)
        generalData = None
        externalData = {"MusicBrainzID": artistData["MBID"]}
        #mbID       = mbutil.getArtistID(artistData['MBID']

        trackData = [{
            "Name": track["name"],
            "URL": track["URL"],
            "Counts": int(track["counts"])
        } for track in tracks if int(track["counts"]) > 50]
        counts = sorted([x["Counts"] for x in trackData], reverse=True)
        idx = min([len(counts) - 1, 1000 - 1])
        trackData = [v for v in trackData if v['Counts'] >= counts[idx]]

        albumData = [{
            "Name": album["name"],
            "URL": album["URL"],
            "Counts": int(album["counts"])
        } for album in albums if int(album["counts"]) > 25]
        counts = sorted([x["Counts"] for x in albumData], reverse=True)
        idx = min([len(counts) - 1, 1000 - 1])
        albumData = [v for v in albumData if v['Counts'] >= counts[idx]]

        mediaData = {}
        if len(trackData) > 0:
            mediaName = "Tracks"
            mediaData[mediaName] = []
            for artistTrack in trackData:
                m = md5()
                m.update(artistTrack['Name'].encode('utf-8'))
                m.update(artistTrack['URL'].encode('utf-8'))
                hashval = m.hexdigest()
                code = str(int(hashval, 16) % int(1e7))

                album = artistTrack["Name"]
                albumURL = artistTrack["URL"]
                albumArtists = [artistData["Name"]]

                amdc = artistDBMediaDataClass(album=album,
                                              url=albumURL,
                                              artist=albumArtists,
                                              code=code,
                                              year=None)
                mediaData[mediaName].append(amdc)

        if len(albumData) > 0:
            mediaName = "Albums"
            mediaData[mediaName] = []
            for artistAlbum in albumData:
                m = md5()
                m.update(artistAlbum['Name'].encode('utf-8'))
                m.update(artistAlbum['URL'].encode('utf-8'))
                hashval = m.hexdigest()
                code = str(int(hashval, 16) % int(1e7))

                album = artistAlbum["Name"]
                albumURL = artistAlbum["URL"]
                albumArtists = [artistName]

                amdc = artistDBMediaDataClass(album=album,
                                              url=albumURL,
                                              artist=albumArtists,
                                              code=code,
                                              year=None)
                mediaData[mediaName].append(amdc)

        artist = artistDBNameClass(name=artistName, err=None)
        meta = artistDBMetaClass(title=None, url=artistURL)
        url = artistDBURLClass(url=artistURL)
        ID = artistDBIDClass(ID=artistID)
        pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False)
        profile = artistDBProfileClass(general=generalData,
                                       external=externalData)
        media = artistDBMediaClass()
        media.media = mediaData
        mediaCounts = self.getMediaCounts(media)
        info = self.getInfo()

        adc = artistDBDataClass(artist=artist,
                                meta=meta,
                                url=url,
                                ID=ID,
                                pages=pages,
                                profile=profile,
                                mediaCounts=mediaCounts,
                                media=media,
                                info=info)

        return adc
示例#14
0
    def getData(self, inputdata):
        self.getDataBase(inputdata)
        self.checkData()

        if self.dbdata is not None:
            return self.dbdata
        if not isinstance(self.bsdata, dict):
            raise ValueError("Could not parse Spotify API data")

        artistData = self.bsdata['Artist']
        artistID = artistData.name
        artistURI = artistData.get('uri')
        artistType = artistData.get('stype')
        artistPopularity = artistData.get('popularity')
        artistName = artistData.get('name')
        artistAPIURL = artistData.get('href')
        artistGenres = artistData.get('genres', [])
        artistFollowers = artistData.get('followers')
        artistURL = artistData.get('urls', {}).get('spotify')

        generalData = {"Type": artistType}
        genresData = artistGenres if len(artistGenres) > 0 else None
        externalData = {'SpotifyAPI': {"URL": artistAPIURL, "URI": artistURI}}
        extraData = {
            'Followers': artistFollowers,
            "Popularity": artistPopularity
        }

        mediaData = {}
        albumsData = self.bsdata['Albums']
        if len(albumsData) > 0:
            albumsURL = albumsData.get('href')
            if albumsData.get('artistID') != artistID:
                raise ValueError(
                    "ArtistIDs do not match for Spotify API Data! [{0}, {1}]".
                    format(albumsData.get('artistID'), artistID))

            mediaData = {}
            for albumData in albumsData.get('albums', []):
                albumID = albumData.get('sid')
                albumGroup = albumData.get('album_group')
                albumType = albumData.get('album_type')
                albumSType = albumData.get('stype')
                albumArtists = [{
                    artist['sid']: artist['name']
                } for artist in albumData.get('artists', [])]
                albumURL = albumData.get('urls', {}).get('spotify')
                albumURI = albumData.get('uri')
                albumAPI = albumData.get('href')
                albumName = albumData.get('name')
                albumTracks = albumData.get('numtracks')
                albumDate = albumData.get('date')
                try:
                    albumYear = to_datetime(
                        albumDate).year if albumDate is not None else None
                except:
                    albumYear = None

                if all([albumGroup, albumType]):
                    mediaName = " + ".join([albumGroup, albumType])
                elif albumGroup is not None:
                    mediaName = albumGroup
                elif albumType is not None:
                    mediaName = albumType
                else:
                    mediaName = "Unknown"

                amdc = artistDBMediaDataClass(album=albumName,
                                              url=albumURL,
                                              artist=albumArtists,
                                              code=albumID,
                                              year=albumYear,
                                              aclass=albumSType,
                                              aformat={
                                                  "URI": albumURI,
                                                  "API": albumAPI,
                                                  "Date": albumDate,
                                                  "NumTracks": albumTracks
                                              })
                if mediaData.get(mediaName) is None:
                    mediaData[mediaName] = []
                mediaData[mediaName].append(amdc)

        artist = artistDBNameClass(name=artistName, err=None)
        meta = artistDBMetaClass(title=None, url=artistURL)
        url = artistDBURLClass(url=artistURL)
        ID = artistDBIDClass(ID=artistID)
        pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False)
        profile = artistDBProfileClass(general=generalData,
                                       external=externalData,
                                       extra=extraData,
                                       genres=genresData)
        media = artistDBMediaClass()
        media.media = mediaData
        mediaCounts = self.getMediaCounts(media)
        info = self.getInfo()

        adc = artistDBDataClass(artist=artist,
                                meta=meta,
                                url=url,
                                ID=ID,
                                pages=pages,
                                profile=profile,
                                mediaCounts=mediaCounts,
                                media=media,
                                info=info)

        return adc
示例#15
0
    def getData(self, inputdata):
        self.getDataBase(inputdata)
        self.checkData()

        if self.dbdata is not None:
            return self.dbdata
        if not isinstance(self.bsdata, dict):
            raise ValueError("Could not parse Deezer API data")

        artist = self.bsdata

        artistTracks = artist["Tracks"]
        artistAlbums = artist["Albums"]
        artistName = artist["Name"]
        artistID = artist["ID"]
        artistURL = artist["URL"]
        generalData = {"Type": artist["Type"]}

        mediaData = {}
        mediaName = "Tracks"
        mediaData[mediaName] = []
        for code, artistTrack in artistTracks.items():
            album = artistTrack["Name"]
            albumURL = artistTrack["URL"]
            albumArtists = [artistName]

            amdc = artistDBMediaDataClass(album=album,
                                          url=albumURL,
                                          artist=albumArtists,
                                          code=code,
                                          year=None)
            mediaData[mediaName].append(amdc)

        mediaData = {}
        mediaName = "Albums"
        mediaData[mediaName] = []
        for code, artistAlbum in artistAlbums.items():
            album = artistAlbum["Name"]
            albumURL = artistAlbum["URL"]
            albumArtists = [artistName]

            amdc = artistDBMediaDataClass(album=album,
                                          url=albumURL,
                                          artist=albumArtists,
                                          code=code,
                                          year=None)
            mediaData[mediaName].append(amdc)

        artist = artistDBNameClass(name=artistName, err=None)
        meta = artistDBMetaClass(title=None, url=artistURL)
        url = artistDBURLClass(url=artistURL)
        ID = artistDBIDClass(ID=artistID)
        pages = artistDBPageClass(ppp=1, tot=1, redo=False, more=False)
        profile = artistDBProfileClass(general=generalData)
        media = artistDBMediaClass()
        media.media = mediaData
        mediaCounts = self.getMediaCounts(media)
        info = self.getInfo()

        adc = artistDBDataClass(artist=artist,
                                meta=meta,
                                url=url,
                                ID=ID,
                                pages=pages,
                                profile=profile,
                                mediaCounts=mediaCounts,
                                media=media,
                                info=info)

        return adc
示例#16
0
    def getData(self, inputdata):
        self.getDataBase(inputdata)
        self.checkData()
        
        if self.dbdata is not None:
            return self.dbdata
        if not isinstance(self.bsdata, dict):
            raise ValueError("Could not parse Discogs API data")
            
        artistData = self.bsdata["Artist"]
        albumsData = self.bsdata["Albums"]
        
        artistID   = artistData.name
        artistName  = artistData["name"]
        url         = "https://www.discogs.com/artist/{0}".format(artistID)

        generalData = {}
        generalData["RealName"]   = artistData["realname"]
        generalData["Aliases"]    = artistData["MasterAliases"]
        generalData["Groups"]     = artistData["MasterGroups"]
        generalData["Members"]    = artistData["MasterMembers"]
        generalData["Variations"] = artistData["MasterNameVariations"]
        generalData = {k: v for k,v in generalData.items() if v is not None}
        generalData = generalData if len(generalData) > 0 else None
            

        ########################################################################
        # Get Releases
        ########################################################################
        mediaData = {}
        if isinstance(albumsData,list):
            for item in albumsData:
                code        = item.get('id')
                albumType   = item.get('type')
                albumFormat = item.get('format')
                albumLabel  = item.get('label')
                albumName   = item.get('name')
                albumURL    = item.get('url')
                albumRole   = item.get('role')
                albumArtist = item.get('artist')
                albumYear   = item.get('year')
                albumMain   = item.get('main_release')

                mediaName = self.getMediaType(item)

                amdc = artistDBMediaDataClass(album=albumName, url=albumURL, artist=albumArtist, code=code, aformat=albumFormat, aclass={"Label": albumLabel, "Main": albumMain}, year=albumYear)
                if mediaData.get(mediaName) is None:
                    mediaData[mediaName] = []
                mediaData[mediaName].append(amdc)
        elif isinstance(albumsData,dict):
            mediaData = albumsData
        else:
            raise ValueError("Not sure how to process albums [{0}]".format(albumsData))
            
            
        artist      = artistDBNameClass(name=artistName, err=None)
        meta        = artistDBMetaClass(title=None, url=url)
        url         = artistDBURLClass(url=url)
        ID          = artistDBIDClass(ID=artistID)
        pages       = artistDBPageClass(ppp=1, tot=1, redo=False, more=False)
        profile     = artistDBProfileClass(general=generalData)
        media       = artistDBMediaClass()
        media.media = mediaData
        mediaCounts = self.getMediaCounts(media)
        info        = artistDBFileInfoClass(info=None)
        
        adc = artistDBDataClass(artist=artist, meta=meta, url=url, ID=ID, pages=pages, profile=profile, mediaCounts=mediaCounts, media=media, info=info)
        
        return adc