Пример #1
0
    def getAlsoKnownAs(self, tag):
        if tag is None:
            return None
#        {'tag': <div class="info_content"><span class="rendered_text">Dwight David Turner [birth name], <a class="artist" href="/artist/dwight_david" title="[Artist864564]">Dwight David</a>, Spider Turner</span></div>}
        span = tag.getTag().find("span", {"class": "rendered_text"})
        retval = []
        if span is not None:
            refs = span.findAll("a")
            for ref in refs:
                result = artistDBLinkClass(ref)
                retval.append(result)
                span = removeTag(span, ref)

            for result in span.text.split(","):
                retval.append(artistDBTextClass(result.strip()))
        else:
            refs = tag.getTag().findAll("a")
            if len(refs) == 0:
                try:
                    retval.append(artistDBTextClass(tag.getTag().strip()))
                except:
                    pass
            else:
                for ref in refs:
                    result = artistDBLinkClass(ref)
                    retval.append(result)
        return retval
Пример #2
0
    def getProfile(self):  
        result = self.bsdata.find("div", {"class": "profile"})
        heads = result.findAll("div", {"class": "head"})
        content = result.findAll("div", {"class": "content"})
        profileData = dict(zip(heads, content)) if len(heads) == len(content) else {}
        generalData = {}
        for head,content in profileData.items():
            key  = head.text[:-1] if isinstance(head.text, str) else None
            refs = content.findAll("a")
            val  = [artistDBTextClass(content)] if len(refs) == 0 else [artistDBLinkClass(ref) for ref in refs]
            generalData[key] = val  


        extraData={}
        ulData = self.bsdata.findAll("ul", {"class": "facets_nav"})
        for ul in ulData:
            refs  = ul.findAll("a") if ul is not None else None
            attrs = [[ref.attrs.get('data-credit-type'), ref.attrs.get('data-credit-subtype'),ref] for ref in refs] if refs is not None else None
            for dctype,dcsubtype,ref in attrs:
                if not all([dctype,dcsubtype]):
                    continue
                if extraData.get(dctype) is None:
                    extraData[dctype] = {}
                extraData[dctype][dcsubtype] = artistDBLinkClass(ref)
        extraData = extraData if len(extraData) > 0 else None

        apc = artistDBProfileClass(general=generalData, extra=extraData)
        return apc
Пример #3
0
    def getProfile(self):
        generalData = {}
        genreData = None
        extraData = None
        tagsData = None

        artistInfo = self.bsdata.find("div", {"class": "artistTopBox info"})
        detailRows = artistInfo.findAll(
            "div", {"class": "detailRow"}) if artistInfo is not None else []
        for row in detailRows:
            span = row.find("span")
            if span is None:
                continue
            key = span.text.strip() if span.text is not None else None
            key = key[1:].strip() if (isinstance(key, str)
                                      and key.startswith("/")) else key
            refs = row.findAll("a")
            if len(refs) == 0:
                continue
            vals = [artistDBLinkClass(ref) for ref in refs
                    ] if (isinstance(refs, list) and len(refs) > 0) else None

            if key == "Genres":
                genreData = vals
            else:
                generalData[key] = vals

        relatedArtists = self.bsdata.find("div", {"class": "relatedArtists"})
        artistBlocks = relatedArtists.findAll(
            "div",
            {"class": "artistBlock"}) if relatedArtists is not None else None
        refs = [artistBlock.find("a") for artistBlock in artistBlocks
                ] if artistBlocks is not None else None
        if refs is not None:
            extraData = [
                artistDBLinkClass(ref) for ref in refs if ref is not None
            ]

        generalData = generalData if len(generalData) > 0 else None

        apc = artistDBProfileClass(general=generalData,
                                   genres=genreData,
                                   tags=tagsData,
                                   extra=extraData)
        return apc
Пример #4
0
    def getMedia(self, artist):
        amc = artistDBMediaClass()

        albumBlocks = self.bsdata.findAll("div", {"class": "albumBlock"})
        for i, albumBlock in enumerate(albumBlocks):
            #print(i,'/',len(albumBlocks))
            blockData = {}
            for div in albumBlock.findAll("div"):
                attr = div.attrs.get("class")
                key = attr[0] if isinstance(attr, list) else None
                ref = div.find("a")
                val = artistDBLinkClass(
                    ref) if ref is not None else artistDBTextClass(div)
                blockData[key] = val

            urlData = blockData.get("image")
            url = urlData.href if isinstance(urlData,
                                             artistDBLinkClass) else None

            titleData = blockData.get("albumTitle")
            title = titleData.text if isinstance(titleData,
                                                 artistDBTextClass) else None

            yearData = blockData.get("date")
            year = yearData.text if isinstance(yearData,
                                               artistDBTextClass) else None

            mediaTypeData = blockData.get("type")
            mediaType = mediaTypeData.text if isinstance(
                mediaTypeData, artistDBTextClass) else None

            code = self.dutils.getAlbumCode(name=title, url=url)
            amdc = artistDBMediaDataClass(album=title,
                                          url=url,
                                          aclass=None,
                                          aformat=None,
                                          artist="U2",
                                          code=code,
                                          year=year)

            if amc.media.get(mediaType) is None:
                amc.media[mediaType] = []
            amc.media[mediaType].append(amdc)
            if self.debug:
                print("\t\tAdding Media ({0} -- {1})".format(title, url))

        return amc
Пример #5
0
    def getProfile(self):
        profile = self.bsdata.find("div", {"class": "artist_info"})
        if profile is None:
            apc = artistDBProfileClass(err="NoInfo")
            return apc

        headers = profile.findAll("div", {"class": "info_hdr"})
        headers = [header.text for header in headers]
        content = profile.findAll("div", {"class": "info_content"})
        profileData = dict(zip(headers, content))

        generalData = {}
        extraData = None
        genreData = None
        externalData = None

        rymList = self.bsdata.find("ul", {"class": "lists"})
        listInfos = rymList.findAll(
            "div", {"class": "list_info"}) if rymList is not None else []
        userLists = []
        for userList in listInfos:
            listName = userList.find("div", {"class": "list_name"})
            listUser = userList.find("div", {"class": "list_author"})
            listRef = listName.find("a")
            if listRef is not None:
                userLists.append(artistDBLinkClass(listRef))
            continue
            listRef = listName.find("a") if listName is not None else None
            listText = listRef.text if listRef is not None else None
            listRef = listRef.attrs['href'] if listRef is not None else None
            userLists[listRef] = listText
        externalData = {"Lists": userLists} if len(userLists) > 0 else None

        if profileData.get("Formed") is not None:
            tag = profileData["Formed"]
            if tag is not None:
                refs = tag.findAll("a")
                generalData["Formed"] = [artistDBTextClass(tag)] if len(
                    refs) == 0 else [artistDBLinkClass(ref) for ref in refs]

        if profileData.get("Disbanded") is not None:
            tag = profileData["Disbanded"]
            if tag is not None:
                refs = tag.findAll("a")
                generalData["Disbanded"] = [artistDBTextClass(tag)] if len(
                    refs) == 0 else [artistDBLinkClass(ref) for ref in refs]

        if profileData.get("Members") is not None:
            tag = profileData["Members"]
            if tag is not None:
                refs = tag.findAll("a")
                generalData["Members"] = [artistDBTextClass(tag)] if len(
                    refs) == 0 else [artistDBLinkClass(ref) for ref in refs]

        if profileData.get("Also Known As"):
            tag = profileData["Also Known As"]
            if tag is not None:
                refs = tag.findAll("a")
                generalData["Also Known As"] = [artistDBTextClass(tag)] if len(
                    refs) == 0 else [artistDBLinkClass(ref) for ref in refs]

        if profileData.get("Member of"):
            tag = profileData["Member of"]
            if tag is not None:
                refs = tag.findAll("a")
                extraData = {}
                extraData["Member of"] = [artistDBTextClass(tag)] if len(
                    refs) == 0 else [artistDBLinkClass(ref) for ref in refs]

        if profileData.get("Related Artists"):
            tag = profileData["Related Artists"]
            if tag is not None:
                refs = tag.findAll("a")
                extraData = {}
                extraData["Related Artists"] = [artistDBTextClass(tag)] if len(
                    refs) == 0 else [artistDBLinkClass(ref) for ref in refs]

        if profileData.get("Born"):
            tag = profileData["Born"]
            if tag is not None:
                refs = tag.findAll("a")
                generalData["Born"] = [artistDBTextClass(tag)] if len(
                    refs) == 0 else [artistDBLinkClass(ref) for ref in refs]

        if profileData.get("Currently"):
            tag = profileData["Currently"]
            if tag is not None:
                refs = tag.findAll("a")
                generalData["Currently"] = [artistDBTextClass(tag)] if len(
                    refs) == 0 else [artistDBLinkClass(ref) for ref in refs]

        if profileData.get("Genres") is not None:
            tag = profileData["Genres"]
            if tag is not None:
                refs = tag.findAll("a")
                genreData = [artistDBTextClass(tag)] if len(refs) == 0 else [
                    artistDBLinkClass(ref) for ref in refs
                ]

        if profileData.get("Notes"):
            tag = profileData["Notes"]
            if tag is not None:
                refs = tag.findAll("a")
                generalData["Notes"] = [artistDBTextClass(tag)] if len(
                    refs) == 0 else [artistDBLinkClass(ref) for ref in refs]

        generalData = generalData if len(generalData) > 0 else None

        apc = artistDBProfileClass(general=generalData,
                                   genres=genreData,
                                   extra=extraData,
                                   external=externalData)
        return apc
Пример #6
0
    def getProfile(self):
        ##
        ## Artist information
        ##
        artistInformation = {}
        properties = self.bsdata.find("dl", {"class": "properties"})
        if properties is not None:
            dds = properties.findAll("dd")
            for val in dds:
                attrs = val.attrs.get('class')
                if isinstance(attrs, list) and len(attrs) == 1:
                    attrKey = attrs[0]
                    refs = val.findAll('a')
                    attrVal = [artistDBTextClass(val)] if len(refs) == 0 else [
                        artistDBLinkClass(ref) for ref in refs
                    ]
                    artistInformation[attrKey] = attrVal

        ##
        ## Genres
        ##
        genreList = self.bsdata.find("div", {"class": "genre-list"})
        genreData = [artistDBLinkClass(ref) for ref in genreList.findAll("a")
                     ] if genreList is not None else None

        ##
        ## Tags
        ##
        tagList = self.bsdata.find("div", {"id": "sidebar-tag-list"})
        tagData = [artistDBLinkClass(ref) for ref in tagList.findAll("a")
                   ] if tagList is not None else None

        ##
        ## External Links
        ##
        externalLinks = {}
        externalLinksList = self.bsdata.find("ul", {"class": "external_links"})
        if externalLinksList is not None:
            lis = externalLinksList.findAll("li")
            for li in lis:
                attrs = li.attrs.get('class')
                if isinstance(attrs, list) and len(attrs) == 1:
                    attrKey = attrs[0]
                    refs = li.findAll('a')
                    attrVal = [artistDBTextClass(li)] if len(refs) == 0 else [
                        artistDBLinkClass(ref) for ref in refs
                    ]
                    externalLinks[attrKey] = attrVal

        ##
        ## Extra
        ##
        tabs = self.bsdata.find("div", {"class": "tabs"})
        refs = tabs.findAll("a") if tabs is not None else None
        tabLinks = [artistDBLinkClass(ref)
                    for ref in refs] if refs is not None else None
        keys = [x.text for x in tabLinks] if tabLinks is not None else None
        vals = tabLinks
        tabsData = dict(zip(keys, vals)) if (isinstance(keys, list)
                                             and all(keys)) else None
        extraData = tabsData

        apc = artistDBProfileClass(general=artistInformation,
                                   tags=tagData,
                                   genres=genreData,
                                   extra=extraData,
                                   external=externalLinks)
        return apc
Пример #7
0
    def getProfile(self):
        generalData = {}

        ##
        ## General
        ##
        metadata = self.bsdata.find("div", {"class": "metadata-and-wiki-row"})
        if metadata is not None:
            dls = metadata.findAll("dl")
            for dl in dls:
                dts = [dt.text for dt in dl.findAll("dt")]
                dds = dl.findAll("dd")
                attrKeys = dts
                attrVals = []
                for dd in dds:
                    refs = dd.findAll("a")
                    attrVals.append(
                        [artistDBTextClass(dd)] if len(refs) ==
                        0 else [artistDBLinkClass(ref) for ref in refs])
                dlData = dict(zip(attrKeys, attrVals))
                generalData["Metadata"] = dlData

        wikicolumns = self.bsdata.findAll("div", {"class": "wiki-column"})
        for wikicolumn in wikicolumns:
            wikiblocks = wikicolumn.findAll("div", {"class": "wiki-block"})
            for wikiblock in wikiblocks:
                refs = wikiblock.findAll("a")
                links = [artistDBLinkClass(ref)
                         for ref in refs] if (isinstance(refs, list)
                                              and len(refs) > 0) else None
                for ref in refs:
                    removeTag(wikiblock, ref)
                text = artistDBTextClass(wikiblock)
                if generalData.get("Wiki") is None:
                    generalData["Wiki"] = {"Text": [], "Refs": []}
                generalData["Wiki"]["Text"].append(text)
                for ref in refs:
                    generalData["Wiki"]["Refs"] += links
        if generalData.get("Wiki") is not None:
            keep = {(ref.href, ref.text): ref
                    for ref in generalData["Wiki"]["Refs"]}
            generalData["Wiki"]["Refs"] = list(keep.values())

        similarData = self.bsdata.find(
            "ol", {"class": "catalogue-overview-similar-artists-full-width"})
        similarData = self.bsdata.find(
            "section", {"class": "artist-similar-sidebar"
                        }) if similarData is None else similarData
        lis = similarData.findAll("li") if similarData is not None else None
        refs = [li.find("a", {"class": "link-block-target"})
                for li in lis] if lis is not None else None
        similarArtists = [artistDBLinkClass(ref)
                          for ref in refs] if (isinstance(refs, list)
                                               and len(refs) > 0) else None
        extraData = similarArtists

        ##
        ## Tags
        ##
        tags = self.bsdata.find("section", {"class": "catalogue-tags"})
        refs = tags.findAll("a") if tags is not None else None
        tagsData = [artistDBLinkClass(ref) for ref in refs
                    ] if (isinstance(refs, list) and len(refs) > 0) else None

        ##
        ## External
        ##
        external = self.bsdata.find("section",
                                    {"class": "external-links-section"})
        refs = external.findAll("a") if external is not None else None
        externalData = [artistDBLinkClass(ref)
                        for ref in refs] if (isinstance(refs, list)
                                             and len(refs) > 0) else None

        generalData = generalData if len(generalData) > 0 else None

        apc = artistDBProfileClass(general=generalData,
                                   tags=tagsData,
                                   extra=extraData,
                                   external=externalData)
        return apc
Пример #8
0
    def getMediaSongs(self):
        mediaType = "Songs"
        media = {}
        tables = self.bsdata.findAll("table")
        for table in tables:
            trs = table.findAll("tr")

            header = trs[0]
            ths = header.findAll("th")
            headers = [x.text.strip() for x in ths]
            if len(headers) == 0:
                continue
            for j, tr in enumerate(trs[1:]):
                tds = tr.findAll("td")
                vals = [td.text.strip() for td in tds]

                tdTitle = tr.find("td", {"class": "title-composer"})
                divTitle = tdTitle.find(
                    "div", {"class": "title"}) if tdTitle is not None else None
                compTitle = tdTitle.find(
                    "div",
                    {"class": "composer"}) if tdTitle is not None else None

                songTitle = divTitle.text if divTitle is not None else None
                songTitle = songTitle.strip(
                ) if songTitle is not None else None
                songURL = divTitle.find('a') if divTitle is not None else None
                songURL = artistDBLinkClass(
                    songURL) if songURL is not None else None

                if songTitle is None:
                    continue

                songArtists = compTitle.findAll(
                    "a") if compTitle is not None else None
                if songArtists is not None:
                    if len(songArtists) == 0:
                        songArtists = [artistDBTextClass(compTitle.text)]
                    else:
                        songArtists = [
                            artistDBLinkClass(ref) for ref in songArtists
                        ]

                m = md5()
                m.update(str(j).encode('utf-8'))
                if songTitle is not None:
                    m.update(songTitle.encode('utf-8'))
                code = str(int(m.hexdigest(), 16) % int(1e5))

                amdc = artistDBMediaDataClass(album=songTitle,
                                              url=songURL,
                                              aclass=None,
                                              aformat=None,
                                              artist=songArtists,
                                              code=code,
                                              year=None)
                if media.get(mediaType) is None:
                    media[mediaType] = []
                media[mediaType].append(amdc)

        return media
Пример #9
0
    def getProfile(self):
        generalData = None
        genreData = None
        tagsData = None
        extraData = None

        content = self.bsdata.find("meta", {"name": "title"})
        contentAttr = content.attrs if content is not None else None
        searchTerm = contentAttr.get(
            "content") if contentAttr is not None else None
        searchData = [artistDBTextClass(searchTerm)
                      ] if searchTerm is not None else None

        tabsul = self.bsdata.find("ul", {"class": "tabs"})
        #print('tabsul',tabsul)
        refs = tabsul.findAll("a") if tabsul is not None else None
        #print('refs',refs)
        tabLinks = [artistDBLinkClass(ref)
                    for ref in refs] if refs is not None else None
        #print('tabLinks',tabLinks)
        #print('tabLinks',[x.get() for x in tabLinks])
        tabKeys = []
        if isinstance(tabLinks, list):
            for i, tabLink in enumerate(tabLinks):
                keyTitle = tabLink.title
                keyText = tabLink.text
                if keyTitle is not None:
                    tabKeys.append(keyTitle)
                    continue
                if keyText is not None:
                    key = keyText.replace("\n", "").split()[0]
                    tabKeys.append(key)
                    continue
                tabKeys.append("Tab {0}".format(i))
        else:
            tabKeys = None

        tabsData = dict(zip(tabKeys, tabLinks)) if (isinstance(tabKeys, list)
                                                    and all(tabKeys)) else None
        #print('tabsData', tabsData)

        if searchData is not None:
            if extraData is None:
                extraData = {}
            extraData["Search"] = searchData
        if tabsData is not None:
            if extraData is None:
                extraData = {}
            extraData["Tabs"] = tabsData
        #print('extraData',extraData)

        basicInfo = self.bsdata.find("section", {"class": "basic-info"})
        if basicInfo is not None:
            for div in basicInfo.findAll("div"):
                attrs = div.attrs.get('class')
                if isinstance(attrs, list) and len(attrs) == 1:
                    attrKey = attrs[0]
                    if attrKey == "genre":
                        refs = div.findAll("a")
                        val = [artistDBTextClass(div)] if len(refs) == 0 else [
                            artistDBLinkClass(ref) for ref in refs
                        ]
                        genreData = val
                    elif attrKey == "styles":
                        refs = div.findAll("a")
                        val = [artistDBTextClass(div)] if len(refs) == 0 else [
                            artistDBLinkClass(ref) for ref in refs
                        ]
                        tagsData = val
                    else:
                        if generalData is None:
                            generalData = {}
                        refs = div.findAll("a")
                        val = [artistDBTextClass(div)] if len(refs) == 0 else [
                            artistDBLinkClass(ref) for ref in refs
                        ]
                        generalData[attrKey] = val

        apc = artistDBProfileClass(general=generalData,
                                   tags=tagsData,
                                   genres=genreData,
                                   extra=extraData)
        return apc