Пример #1
0
def KCRW_harvest(maxbands):
    c = []
    i = 1
    allbands = []
    print ('Grabbing KCRW bands')
    while (i<30) and len(allbands)<maxbands:
        url = 'https://tracklist-api.kcrw.com/Simulcast/all/' + str(i)
        response = urllib.request.urlopen(url).read()
        data = json.loads(response)
        print(("KCRW page {0} \n".format(i)))
        for entry in data:
            bandname = entry["artist"]
            trackname = entry['title']
            if entry["program_title"] == "Morning Becomes Eclectic":
                if bandname != "[BREAK]":
                    newband = band(name=bandname, song=trackname, appeared = 'KCRW Eclectic')
                    allbands.append(newband)
            else:
                if bandname != "[BREAK]":
                    newband = band(name=bandname, song=trackname, appeared = 'not KCRW Eclectic')
                    allbands.append(newband)
        i+=1

    for j in allbands:
        if j not in c:
            c.append(j)

    return c[:maxbands]
Пример #2
0
def add_to_db(Session, k):
    session = Session()
    t = dt.date.today()
    adds = 0
    cleantracks = k
    for line in cleantracks:
        clean_name = cleanup(line[0])
        n_ = band(name=line[0],
                  song=line[1],
                  album=line[2],
                  release_year=line[3],
                  source=line[4],
                  appeared=line[4],
                  dateplayed=line[5],
                  dateadded=t,
                  cleanname=clean_name)
        q = session.query(band).filter(band.name == n_.name,
                                       band.song == n_.song,
                                       band.source == n_.source)
        if q.first() == None:
            session.add(n_)
            adds += 1
        else:
            try:
                print('Already had {0} - {1}'.format(n_.name, n_.song))
            except:
                print('Already had it. Cannot print. ID is {0}'.format(
                    q.first().id))
        session.commit()

    return adds
Пример #3
0
def pfork_tracks(maxbands):
    c = []
    allbands = []
    i = 0
    while (len(allbands) < maxbands) and (i < 50):
        i = i + 1
        try:
            print(('Pitchfork page: {0}'.format(i)))
            site = 'https://pitchfork.com/reviews/tracks/?page=' + str(i)
            hdr = {'User-Agent': 'Mozilla/5.0'}
            req = urllib.request.Request(site, headers=hdr)
            page = urllib.request.urlopen(req)
            soup = BeautifulSoup(page, "html.parser")
            a = soup.findAll("div", {"class": "track-collection-item__details"})
            print(("Page {0} retrieved".format(i)))
            for banddiv in a:
                artist = banddiv.find('ul', {'class': 'artist-list'}).li.text \
                    .strip().replace('”', '').replace('“', '')
                track = banddiv.find('h2', {'class': 'track-collection-item__title'}).text \
                    .strip().replace('”', '').replace('“', '')
                print (artist, track)
                newband = band(name=artist, appeared='Pitchfork Top Tracks',
                               song=track)
                allbands.append(newband)
        except Exception as e:
            print (str(e))
            print(("Page {0} failed".format(i)))
            continue

    for j in allbands:
        if j not in c:
            c.append(j)

    return c[:maxbands]
Пример #4
0
def get_jukebox_bangers():
    url = 'http://www.thesinglesjukebox.com/?p=25856'
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = urllib.request.Request(url, headers=hdr)
    page = urllib.request.urlopen(req)
    bs = BeautifulSoup(page, "html.parser")

    a = bs.find('li', {'id': 'linkcat-215'})
    print (a.text)
    b = a.findAll('a')
    links = []
    for i in b:
        print (i.text)
        print (i['href'])
        links.append (i['href'])

    adds = []
    for link in links:
        url = link
        req = urllib.request.Request(url, headers=hdr)
        page = urllib.request.urlopen(req)
        bs = BeautifulSoup(page, "html.parser")
        c = bs.find('div', {'class': 'post'})
        d = c.find('h2')
        e = d.text.split('–')
        artist = e[0].strip()
        song = e[1].strip()
        print (artist, song)
        newband = band(name=artist, appeared='Singles Jukebox 2018 Bangers', song = song, )
        adds.append(newband)

    return adds
Пример #5
0
def Pitchfork_charts(maxbands):
    c = []
    allbands = []
    i=0
    while (len(allbands) < maxbands) and (i<20):
        i = i+1
        try:
            print(('Pitchfork page: {0}'.format(i)))
            site = 'http://pitchfork.com/reviews/best/albums/?page=' + str(i)
            hdr = {'User-Agent': 'Mozilla/5.0'}
            req = urllib.request.Request(site, headers=hdr)
            page = urllib.request.urlopen(req)
            soup = BeautifulSoup(page, "html.parser")
            a = soup.findAll("ul", {"class": "artist-list"})
            print(("Page {0} retrieved".format(i)))
            for banddiv in a:
                album = banddiv.findNext("h2").text
                newband = band(name=banddiv.text, appeared='Pitchfork 8.0+ reviews', album = album)
                allbands.append(newband)
        except Exception as e:
            print (str(e))
            print(("Page {0} failed".format(i)))
            continue

    for j in allbands:
        if j not in c:
            c.append(j)

    return c[:maxbands]
Пример #6
0
def metacritic(maxbands):

    socket.setdefaulttimeout(15)
    url = 'http://www.metacritic.com/browse/albums/score/metascore/year/filtered'

    chromeOptions = webdriver.ChromeOptions()
    prefs = {'profile.managed_default_content_settings.images': 2}
    chromeOptions.add_experimental_option("prefs", prefs)
    driver = webdriver.Chrome(chrome_options=chromeOptions)
    driver.get(url)

    innerHTML = driver.execute_script("return document.body.innerHTML")
    bs = BeautifulSoup(innerHTML, 'html.parser')

    driver.quit()
    allbands = []

    a = bs.find('div', {'class': 'product_rows'})
    b = a.find_all('div', {'class': 'product_row release'})
    for i in b:
        artist = i.find('div', {'class': 'product_item product_artist'}).text.strip()
        album = i.find('div', {'class': 'product_item product_title'}).text.strip()
        newband = band(name=artist, appeared='Metacritic', album = album)
        allbands.append(newband)

    c = []
    for j in allbands:
        if j not in c:
            c.append(j)

    return c[:maxbands]
Пример #7
0
def KEXP_charts(maxbands):

    allbands = []

    basesite = 'http://kexp.org/charts/'
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = urllib.request.Request(basesite, headers=hdr)
    page = urllib.request.urlopen(req)
    bs = BeautifulSoup(page, "html.parser")

    for heading in bs.findAll('h4'):
        genre = heading.text.strip()[:-1]
        print (genre)
        contents = (heading.findNext('p').text).splitlines()
        for i in contents:
            if i == []:
                print ('empty')
                continue
            elif len(i) > 1:
                a = i
                b = a.split()
                if b[0][0].isdigit():
                    b.remove(b[0])
                e = ' '.join(i for i in b)
                e = e.replace('(self-released)', '')
                # print (c)
                d = e.split('-')
                if len(d) == 2:
                    artist = d[0]
                    parens = d[1].find('(')
                    album = d[1][:parens].strip()
                d = e.split('–')
                if len(d) == 2:
                    artist = d[0].strip()
                    parens = d[1].find('(')
                    album = d[1][:parens].strip()

                print (artist, album)
                newband = band(name=artist, appeared=genre, album=album)
                allbands.append(newband)

    # half of this list will be the Top 90
    d = []
    e = []
    for i in allbands:
        if i.appeared == 'KEXP Top 90':
            if i not in d:
                d.append(i)
        else:
            if i not in e:
                e.append(i)

    half = maxbands // 2
    d = d[:half]
    shuffle(e)
    c = d + e[half:maxbands]

    return c[:maxbands]
Пример #8
0
def sgum(maxbands):

    socket.setdefaulttimeout(10)
    allbands = []
    url1 = 'https://www.stereogum.com/category/franchises/album-of-the-week/'

    j = 1
    while len(allbands) < maxbands:
        print ('Getting Stereogum Album of the Week, page {0}'.format(j))
        url = url1 + 'page/' + str(j) + '/'

        chromeOptions = webdriver.ChromeOptions()
        prefs = {'profile.managed_default_content_settings.images': 2}
        chromeOptions.add_experimental_option("prefs", prefs)
        driver = webdriver.Chrome(chrome_options=chromeOptions)
        driver.set_page_load_timeout(15)
        driver.get(url)

        innerHTML = driver.execute_script("return document.body.innerHTML")
        bs = BeautifulSoup(innerHTML, 'html.parser')

        driver.quit()

        a = bs.find_all('div', class_='article-card article-card-infinite article-card--four-col')
        for i in a:
            if 'Album Of The Week:' in i.text:
                d = i.find('p')
                e = d.text
                e = re.sub('Album Of The Week:', '', e)
                try:
                    album = d.find('em').text.strip()
                except:
                    try:
                        album = d.find('i').text.strip()
                    except:
                        continue

                artist = re.sub(album, '' , e).strip()
                newband = band(name=artist, appeared='Stereogum', album=album)
                allbands.append(newband)

        j+=1
        print ('Found {0} bands so far'.format(len(allbands)))
        
    c = []
    for j in allbands:
        if j not in c:
            c.append(j)

    try:
        driver.quit()
    except:
        pass

    return c[:maxbands]
Пример #9
0
def KNKX(maxbands):
    socket.setdefaulttimeout(15)
    url = 'https://www.jazz24.org/playlist/'

    #chromeOptions = webdriver.ChromeOptions()
    #prefs = {'profile.managed_default_content_settings.images': 2}
    #chromeOptions.add_experimental_option("prefs", prefs)
    options = webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--incognito')
    #options.add_argument('--headless')
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    time.sleep(1)

    iframes = driver.find_elements_by_tag_name("iframe")
    driver.switch_to.frame(iframes[0])

    allbands = []
    counter = 0
    while counter < 20 and len(allbands) < maxbands:
        counter +=1
        d = driver.execute_script("return document.body.innerHTML")
        bs = BeautifulSoup(d, 'html.parser')
        b = bs.find_all('div', {'class': 'track_info_box'})
        for i in b:
            track_info = i.find('div', {'class': 'track_info'})
            try:
                track = track_info.find('div', {'class':'track_name clearfix'}).text.title()
                artist = track_info.find('span', {'class': 'track_field_data'}).text.title()

                newband = band(name=artist, appeared='KNKX Jazz24',
                               song=track)
                allbands.append(newband)
                #print (f'{artist} -- {track}')
            except:
                pass

        #time.sleep(2)
        print ('scrolling')
        driver.execute_script("document.getElementById('episodes_container').scrollBy(0,-10000);")
        print (f'songs collected: {len(allbands)}')


    driver.quit()

    #return
    c = allbands
    return c[:maxbands]
Пример #10
0
def load_to_db(albumlist):
    socket.setdefaulttimeout(15)
    # creation of the SQL database and the "session" object that is used to manage
    # communications with the database
    engine = create_engine('sqlite:///../../databases/scout.db')
    session_factory = sessionmaker(bind=engine)
    Session = scoped_session(session_factory)
    metadata = MetaData(db)
    db.metadata.create_all(engine)

    session = Session()

    t = dt.date.today()
    adds = 0

    for i in albumlist:
        print(i)
        clean_name = cleanup(i[0])
        n_ = band(name=i[0],
                  album=i[1],
                  source='KEXP Countdown 2019',
                  appeared='KEXP Countdown 2019',
                  dateadded=t,
                  cleanname=clean_name)
        q = session.query(band).filter(band.name == n_.name,
                                       band.song == n_.song)
        if q.first() == None:
            session.add(n_)
            adds += 1
        else:
            try:
                print('Already had {0} - {1}'.format(n_.name, n_.song))
            except:
                print('Already had it. Cannot print. ID is {0}'.format(
                    q.first().id))
        session.commit()
    print('Added {0} songs'.format(adds))
Пример #11
0
def get_album_tracks(Session):
    session = Session()
    a = session.query(band).filter(band.album != '').filter(
        band.song == None, band.got_rest_of_album == None)
    sp, username = splog_on()
    pp = pprint.PrettyPrinter(indent=4)

    for i in a:
        artist = i.name
        album = i.album
        i.found_by_album = True
        print(f'{artist} - {album} - {i.got_rest_of_album}')

        query1 = 'artist:{0} album:{1}'.format(artist, album)
        results = sp.search(q=query1, type='track')
        try:
            d = results['tracks']['items'][0]['album']
        except:
            continue
        uri = d['uri']
        e = sp.album_tracks(uri)
        f = e['items']
        for j in f:
            new_track = band(name=artist,
                             song=j['name'],
                             appeared=i.appeared,
                             source=i.source,
                             album=i.album,
                             cleanname=i.cleanname,
                             dateadded=i.dateadded,
                             spotify_id=j['id'],
                             spotify_release_date=d['release_date'],
                             found_by_album=True)
            print(f'{new_track.name}, {new_track.song}')
            session.add(new_track)
        i.got_rest_of_album = True
        session.commit()
Пример #12
0
def MTM(maxbands):

    url = 'http://feeds.kexp.org/kexp/musicthatmatters'
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = urllib.request.Request(url, headers=hdr)
    page = urllib.request.urlopen(req)
    bs = BeautifulSoup(page, "html.parser")

    allbands = []
    maxbands = 200
    c = []

    for item in bs.findAll('item'):
        if len(allbands) <= maxbands:
            desc = item.find('description').text
            tr = False
            s = ''
            n = []

            for g in range(0, len(desc)):
                if desc[g].isdigit():
                    if desc[g + 1].isdigit() or desc[g + 1] == '.':
                        tr = False
                        if len(s) > 0:
                            n.append(s)
                        s = ''
                if desc[g] == '.':
                    if desc[g - 1].isdigit():
                        tr = True
                if tr == True:
                    s = s + desc[g]

            for i in n:
                h = i[2:].strip().split('<')[0]
                egg = h.split('-')
                if len(egg) < 2:
                    egg = h.split('–')
                if len(egg) < 2:
                    egg = h.split('-')

                try:
                    artist = egg[0].strip()
                    song = egg[1].strip()
                    newband = band(name=artist, appeared='KEXP Music That Matters',
                                   song=song)
                    allbands.append(newband)
                except Exception as e:
                    print (str(e))
                    try:
                        print (h)
                    except:
                        print('unprintable')
                    continue

    for j in allbands:
        if j not in c:
            c.append(j)

    for i in c[:maxbands]:
        print (i.name, i.song)

    return c[:maxbands]