def getAlsoKnownAs(self, tag): if tag is None: return None # {'tag': <div class="info_content"><span class="rendered_text">Dwight David Turner [birth name], <a class="artist" href="/artist/dwight_david" title="[Artist864564]">Dwight David</a>, Spider Turner</span></div>} span = tag.getTag().find("span", {"class": "rendered_text"}) retval = [] if span is not None: refs = span.findAll("a") for ref in refs: result = artistDBLinkClass(ref) retval.append(result) span = removeTag(span, ref) for result in span.text.split(","): retval.append(artistDBTextClass(result.strip())) else: refs = tag.getTag().findAll("a") if len(refs) == 0: try: retval.append(artistDBTextClass(tag.getTag().strip())) except: pass else: for ref in refs: result = artistDBLinkClass(ref) retval.append(result) return retval
def getProfile(self): result = self.bsdata.find("div", {"class": "profile"}) heads = result.findAll("div", {"class": "head"}) content = result.findAll("div", {"class": "content"}) profileData = dict(zip(heads, content)) if len(heads) == len(content) else {} generalData = {} for head,content in profileData.items(): key = head.text[:-1] if isinstance(head.text, str) else None refs = content.findAll("a") val = [artistDBTextClass(content)] if len(refs) == 0 else [artistDBLinkClass(ref) for ref in refs] generalData[key] = val extraData={} ulData = self.bsdata.findAll("ul", {"class": "facets_nav"}) for ul in ulData: refs = ul.findAll("a") if ul is not None else None attrs = [[ref.attrs.get('data-credit-type'), ref.attrs.get('data-credit-subtype'),ref] for ref in refs] if refs is not None else None for dctype,dcsubtype,ref in attrs: if not all([dctype,dcsubtype]): continue if extraData.get(dctype) is None: extraData[dctype] = {} extraData[dctype][dcsubtype] = artistDBLinkClass(ref) extraData = extraData if len(extraData) > 0 else None apc = artistDBProfileClass(general=generalData, extra=extraData) return apc
def getProfile(self): generalData = {} genreData = None extraData = None tagsData = None artistInfo = self.bsdata.find("div", {"class": "artistTopBox info"}) detailRows = artistInfo.findAll( "div", {"class": "detailRow"}) if artistInfo is not None else [] for row in detailRows: span = row.find("span") if span is None: continue key = span.text.strip() if span.text is not None else None key = key[1:].strip() if (isinstance(key, str) and key.startswith("/")) else key refs = row.findAll("a") if len(refs) == 0: continue vals = [artistDBLinkClass(ref) for ref in refs ] if (isinstance(refs, list) and len(refs) > 0) else None if key == "Genres": genreData = vals else: generalData[key] = vals relatedArtists = self.bsdata.find("div", {"class": "relatedArtists"}) artistBlocks = relatedArtists.findAll( "div", {"class": "artistBlock"}) if relatedArtists is not None else None refs = [artistBlock.find("a") for artistBlock in artistBlocks ] if artistBlocks is not None else None if refs is not None: extraData = [ artistDBLinkClass(ref) for ref in refs if ref is not None ] generalData = generalData if len(generalData) > 0 else None apc = artistDBProfileClass(general=generalData, genres=genreData, tags=tagsData, extra=extraData) return apc
def getMedia(self, artist): amc = artistDBMediaClass() albumBlocks = self.bsdata.findAll("div", {"class": "albumBlock"}) for i, albumBlock in enumerate(albumBlocks): #print(i,'/',len(albumBlocks)) blockData = {} for div in albumBlock.findAll("div"): attr = div.attrs.get("class") key = attr[0] if isinstance(attr, list) else None ref = div.find("a") val = artistDBLinkClass( ref) if ref is not None else artistDBTextClass(div) blockData[key] = val urlData = blockData.get("image") url = urlData.href if isinstance(urlData, artistDBLinkClass) else None titleData = blockData.get("albumTitle") title = titleData.text if isinstance(titleData, artistDBTextClass) else None yearData = blockData.get("date") year = yearData.text if isinstance(yearData, artistDBTextClass) else None mediaTypeData = blockData.get("type") mediaType = mediaTypeData.text if isinstance( mediaTypeData, artistDBTextClass) else None code = self.dutils.getAlbumCode(name=title, url=url) amdc = artistDBMediaDataClass(album=title, url=url, aclass=None, aformat=None, artist="U2", code=code, year=year) if amc.media.get(mediaType) is None: amc.media[mediaType] = [] amc.media[mediaType].append(amdc) if self.debug: print("\t\tAdding Media ({0} -- {1})".format(title, url)) return amc
def getProfile(self): profile = self.bsdata.find("div", {"class": "artist_info"}) if profile is None: apc = artistDBProfileClass(err="NoInfo") return apc headers = profile.findAll("div", {"class": "info_hdr"}) headers = [header.text for header in headers] content = profile.findAll("div", {"class": "info_content"}) profileData = dict(zip(headers, content)) generalData = {} extraData = None genreData = None externalData = None rymList = self.bsdata.find("ul", {"class": "lists"}) listInfos = rymList.findAll( "div", {"class": "list_info"}) if rymList is not None else [] userLists = [] for userList in listInfos: listName = userList.find("div", {"class": "list_name"}) listUser = userList.find("div", {"class": "list_author"}) listRef = listName.find("a") if listRef is not None: userLists.append(artistDBLinkClass(listRef)) continue listRef = listName.find("a") if listName is not None else None listText = listRef.text if listRef is not None else None listRef = listRef.attrs['href'] if listRef is not None else None userLists[listRef] = listText externalData = {"Lists": userLists} if len(userLists) > 0 else None if profileData.get("Formed") is not None: tag = profileData["Formed"] if tag is not None: refs = tag.findAll("a") generalData["Formed"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Disbanded") is not None: tag = profileData["Disbanded"] if tag is not None: refs = tag.findAll("a") generalData["Disbanded"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Members") is not None: tag = profileData["Members"] if tag is not None: refs = tag.findAll("a") generalData["Members"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Also Known As"): tag = profileData["Also Known As"] if tag is not None: refs = tag.findAll("a") generalData["Also Known As"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Member of"): tag = profileData["Member of"] if tag is not None: refs = tag.findAll("a") extraData = {} extraData["Member of"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Related Artists"): tag = profileData["Related Artists"] if tag is not None: refs = tag.findAll("a") extraData = {} extraData["Related Artists"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Born"): tag = profileData["Born"] if tag is not None: refs = tag.findAll("a") generalData["Born"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Currently"): tag = profileData["Currently"] if tag is not None: refs = tag.findAll("a") generalData["Currently"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] if profileData.get("Genres") is not None: tag = profileData["Genres"] if tag is not None: refs = tag.findAll("a") genreData = [artistDBTextClass(tag)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] if profileData.get("Notes"): tag = profileData["Notes"] if tag is not None: refs = tag.findAll("a") generalData["Notes"] = [artistDBTextClass(tag)] if len( refs) == 0 else [artistDBLinkClass(ref) for ref in refs] generalData = generalData if len(generalData) > 0 else None apc = artistDBProfileClass(general=generalData, genres=genreData, extra=extraData, external=externalData) return apc
def getProfile(self): ## ## Artist information ## artistInformation = {} properties = self.bsdata.find("dl", {"class": "properties"}) if properties is not None: dds = properties.findAll("dd") for val in dds: attrs = val.attrs.get('class') if isinstance(attrs, list) and len(attrs) == 1: attrKey = attrs[0] refs = val.findAll('a') attrVal = [artistDBTextClass(val)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] artistInformation[attrKey] = attrVal ## ## Genres ## genreList = self.bsdata.find("div", {"class": "genre-list"}) genreData = [artistDBLinkClass(ref) for ref in genreList.findAll("a") ] if genreList is not None else None ## ## Tags ## tagList = self.bsdata.find("div", {"id": "sidebar-tag-list"}) tagData = [artistDBLinkClass(ref) for ref in tagList.findAll("a") ] if tagList is not None else None ## ## External Links ## externalLinks = {} externalLinksList = self.bsdata.find("ul", {"class": "external_links"}) if externalLinksList is not None: lis = externalLinksList.findAll("li") for li in lis: attrs = li.attrs.get('class') if isinstance(attrs, list) and len(attrs) == 1: attrKey = attrs[0] refs = li.findAll('a') attrVal = [artistDBTextClass(li)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] externalLinks[attrKey] = attrVal ## ## Extra ## tabs = self.bsdata.find("div", {"class": "tabs"}) refs = tabs.findAll("a") if tabs is not None else None tabLinks = [artistDBLinkClass(ref) for ref in refs] if refs is not None else None keys = [x.text for x in tabLinks] if tabLinks is not None else None vals = tabLinks tabsData = dict(zip(keys, vals)) if (isinstance(keys, list) and all(keys)) else None extraData = tabsData apc = artistDBProfileClass(general=artistInformation, tags=tagData, genres=genreData, extra=extraData, external=externalLinks) return apc
def getProfile(self): generalData = {} ## ## General ## metadata = self.bsdata.find("div", {"class": "metadata-and-wiki-row"}) if metadata is not None: dls = metadata.findAll("dl") for dl in dls: dts = [dt.text for dt in dl.findAll("dt")] dds = dl.findAll("dd") attrKeys = dts attrVals = [] for dd in dds: refs = dd.findAll("a") attrVals.append( [artistDBTextClass(dd)] if len(refs) == 0 else [artistDBLinkClass(ref) for ref in refs]) dlData = dict(zip(attrKeys, attrVals)) generalData["Metadata"] = dlData wikicolumns = self.bsdata.findAll("div", {"class": "wiki-column"}) for wikicolumn in wikicolumns: wikiblocks = wikicolumn.findAll("div", {"class": "wiki-block"}) for wikiblock in wikiblocks: refs = wikiblock.findAll("a") links = [artistDBLinkClass(ref) for ref in refs] if (isinstance(refs, list) and len(refs) > 0) else None for ref in refs: removeTag(wikiblock, ref) text = artistDBTextClass(wikiblock) if generalData.get("Wiki") is None: generalData["Wiki"] = {"Text": [], "Refs": []} generalData["Wiki"]["Text"].append(text) for ref in refs: generalData["Wiki"]["Refs"] += links if generalData.get("Wiki") is not None: keep = {(ref.href, ref.text): ref for ref in generalData["Wiki"]["Refs"]} generalData["Wiki"]["Refs"] = list(keep.values()) similarData = self.bsdata.find( "ol", {"class": "catalogue-overview-similar-artists-full-width"}) similarData = self.bsdata.find( "section", {"class": "artist-similar-sidebar" }) if similarData is None else similarData lis = similarData.findAll("li") if similarData is not None else None refs = [li.find("a", {"class": "link-block-target"}) for li in lis] if lis is not None else None similarArtists = [artistDBLinkClass(ref) for ref in refs] if (isinstance(refs, list) and len(refs) > 0) else None extraData = similarArtists ## ## Tags ## tags = self.bsdata.find("section", {"class": "catalogue-tags"}) refs = tags.findAll("a") if tags is not None else None tagsData = [artistDBLinkClass(ref) for ref in refs ] if (isinstance(refs, list) and len(refs) > 0) else None ## ## External ## external = self.bsdata.find("section", {"class": "external-links-section"}) refs = external.findAll("a") if external is not None else None externalData = [artistDBLinkClass(ref) for ref in refs] if (isinstance(refs, list) and len(refs) > 0) else None generalData = generalData if len(generalData) > 0 else None apc = artistDBProfileClass(general=generalData, tags=tagsData, extra=extraData, external=externalData) return apc
def getMediaSongs(self): mediaType = "Songs" media = {} tables = self.bsdata.findAll("table") for table in tables: trs = table.findAll("tr") header = trs[0] ths = header.findAll("th") headers = [x.text.strip() for x in ths] if len(headers) == 0: continue for j, tr in enumerate(trs[1:]): tds = tr.findAll("td") vals = [td.text.strip() for td in tds] tdTitle = tr.find("td", {"class": "title-composer"}) divTitle = tdTitle.find( "div", {"class": "title"}) if tdTitle is not None else None compTitle = tdTitle.find( "div", {"class": "composer"}) if tdTitle is not None else None songTitle = divTitle.text if divTitle is not None else None songTitle = songTitle.strip( ) if songTitle is not None else None songURL = divTitle.find('a') if divTitle is not None else None songURL = artistDBLinkClass( songURL) if songURL is not None else None if songTitle is None: continue songArtists = compTitle.findAll( "a") if compTitle is not None else None if songArtists is not None: if len(songArtists) == 0: songArtists = [artistDBTextClass(compTitle.text)] else: songArtists = [ artistDBLinkClass(ref) for ref in songArtists ] m = md5() m.update(str(j).encode('utf-8')) if songTitle is not None: m.update(songTitle.encode('utf-8')) code = str(int(m.hexdigest(), 16) % int(1e5)) amdc = artistDBMediaDataClass(album=songTitle, url=songURL, aclass=None, aformat=None, artist=songArtists, code=code, year=None) if media.get(mediaType) is None: media[mediaType] = [] media[mediaType].append(amdc) return media
def getProfile(self): generalData = None genreData = None tagsData = None extraData = None content = self.bsdata.find("meta", {"name": "title"}) contentAttr = content.attrs if content is not None else None searchTerm = contentAttr.get( "content") if contentAttr is not None else None searchData = [artistDBTextClass(searchTerm) ] if searchTerm is not None else None tabsul = self.bsdata.find("ul", {"class": "tabs"}) #print('tabsul',tabsul) refs = tabsul.findAll("a") if tabsul is not None else None #print('refs',refs) tabLinks = [artistDBLinkClass(ref) for ref in refs] if refs is not None else None #print('tabLinks',tabLinks) #print('tabLinks',[x.get() for x in tabLinks]) tabKeys = [] if isinstance(tabLinks, list): for i, tabLink in enumerate(tabLinks): keyTitle = tabLink.title keyText = tabLink.text if keyTitle is not None: tabKeys.append(keyTitle) continue if keyText is not None: key = keyText.replace("\n", "").split()[0] tabKeys.append(key) continue tabKeys.append("Tab {0}".format(i)) else: tabKeys = None tabsData = dict(zip(tabKeys, tabLinks)) if (isinstance(tabKeys, list) and all(tabKeys)) else None #print('tabsData', tabsData) if searchData is not None: if extraData is None: extraData = {} extraData["Search"] = searchData if tabsData is not None: if extraData is None: extraData = {} extraData["Tabs"] = tabsData #print('extraData',extraData) basicInfo = self.bsdata.find("section", {"class": "basic-info"}) if basicInfo is not None: for div in basicInfo.findAll("div"): attrs = div.attrs.get('class') if isinstance(attrs, list) and len(attrs) == 1: attrKey = attrs[0] if attrKey == "genre": refs = div.findAll("a") val = [artistDBTextClass(div)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] genreData = val elif attrKey == "styles": refs = div.findAll("a") val = [artistDBTextClass(div)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] tagsData = val else: if generalData is None: generalData = {} refs = div.findAll("a") val = [artistDBTextClass(div)] if len(refs) == 0 else [ artistDBLinkClass(ref) for ref in refs ] generalData[attrKey] = val apc = artistDBProfileClass(general=generalData, tags=tagsData, genres=genreData, extra=extraData) return apc