def scrape(): print("Fetching DarkLyrics artist list...") artists = getArtists() print("...Done.\n") # albums = getAlbums(artists[2]) # songs = getSongs(artists[2], albums[4]) # lyrics = getLyrics(artists[2], albums[4], songs[9]) # print(lyrics) sj = ScrapeJam('darklyrics.json', 'darklyrics_errs.json') sj.scrape(artists, getAlbums, getSongs, getLyrics)
def scrape(): next_song = WIKI_URL d = datetime.datetime.now() timestamp = '{:%Y-%m-%d_%H:%M:%S}'.format(d) filename = 'lyricwiki_' + timestamp + '.json' while next_song: soup = BeautifulSoup(getHtml(next_song)) artists_a = soup.select('div#mw-pages')[0]('a') artists = [(artist.string, artist['href']) for artist in artists_a] sj = ScrapeJam(filename, 'lyricwiki_errs.log') sj.scrape(artists, getAlbums, getSongs, getLyrics) next_song = BASE_URL + soup.select('div#mw-pages')[0]('a')[0]['href'] if not 'pagefrom' in next_song: break
def scrape(): sj = ScrapeJam('nightwish.json', 'nightwish_errs.json') sj.scrape([('Nightwish', NIGHTWISH_URL)], getAlbums, getSongs, getLyrics)
def scrape(): artists = getArtists() sj = ScrapeJam('utanet.json', 'utanet.error.json') sj.scrape(artists, getAlbums, getSongs, getLyrics)
def scrape(): sj = ScrapeJam('nightwish.json', 'nightwish_errs.json') sj.scrape([('Nightwish',NIGHTWISH_URL)], getAlbums, getSongs, getLyrics)