Пример #1
0
def album_data():

    url = "http://vlg.berryservice.net:8099/melon/list"
    trs = mf.request(url).select('tbody tr[data-song-no]')

    album_data = []
    b = 0
    for tr in trs:
        album_json = tr.select('td:nth-of-type(4) a')
        album_title = tr.select_one('div.ellipsis.rank03 a').text

        # album id 가져오기
        for j in album_json:
            strings = j.attrs['href']
            pattern = re.compile("\'(.*)\'")
            album_id = re.findall(pattern, strings)

            # album 상세 페이지
            album_url = "http://vlg.berryservice.net:8099/melon/detail?albumId={}".format(
                album_id[0])
            # headers = {
            #     'Referer': 'https://www.melon.com/album/detail.htm?albumId={}'.format(album_id[0]),
            #     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
            # }

            # album 평점 json
            json_url = " http://vlg.berryservice.net:8099/melon/albumratejson?albumId={}".format(
                album_id[0])

            # album의 id, title, genre, 발매일, 발매사, 기획사 가져오기
            divs = mf.request(album_url).select_one('div.entry')
            dls = divs.select('div.meta dl.list')
            for dl in dls:
                releasedt = dl.select_one('dd:nth-of-type(1)').text
                album_genre = dl.select_one('dd:nth-of-type(2)').text
                album_comp = dl.select_one('dd:nth-of-type(3)').text
                entertainment = dl.select_one('dd:nth-of-type(4)').text
                b += 1

            # album의 평점 가져오기
            rating_json = requests.get(json_url).text
            jsonData = json.loads(rating_json, encoding="utf-8")
            rating = jsonData['infoGrade']['TOTAVRGSCORE']

            # 모든 column의 data 모으기
            album_data.append([
                album_id[0], album_title, album_genre,
                "{:.02f}".format(float(rating)), releasedt, album_comp,
                entertainment
            ])
            print("Album ----->", b, "record  --> done!")
    print("Album_data has been downloaded!!!!")
    return (album_data)
Пример #2
0
def songsinger():
    url = "http://vlg.berryservice.net:8099/melon/list"
    sel = "#frm table tbody tr "
    get_song = mf.request(url).select(sel)

    lst = []

    for i in get_song:
        song_number = i.attrs["data-song-no"]
        singer = i.select_one('div.rank02 span').text
        lst.append([song_number, singer])

    # Singer Table에서 가수별 id와 이름 가져오기
    conn = mf.get_conn('melondb')
    cursor = conn.cursor()
    sql2 = '''select artist_id, name from Singer'''
    cursor.execute(sql2)
    lines = cursor.fetchall()

    # MS_Song에서 가져온 노래 제목과 가수 이름을 비교해서 같은 아티스트의 이름이 있는 title을 append
    data = []
    for i in lst:
        for j in lines:
            if j[1] in i[1]:
                data.append([i[0], j[0]])
                print("MS_Song===>>", i, "\nSinger====>>>", j)
    return data
Пример #3
0
def songsinger():
    url = "http://vlg.berryservice.net:8099/melon/list"

    sel_song = "#frm table tbody tr "

    get_song = mf.request(url).select(sel_song)

    lst = []

    for num, i in enumerate(get_song):
        song_number = i.attrs["data-song-no"]
        singer = i.select_one('div.rank02 span').text
        lst.append([song_number, singer])
        # print(album_id)

    conn = mf.get_conn('melondb')
    cursor = conn.cursor()
    sql2 = '''select artist_id, name from Singer'''
    cursor.execute(sql2)
    lines = cursor.fetchall()

    data = []
    for i in lst:
        for j in lines:
            if j[1] in i[1]:
                data.append([j[0], i[0]])
                print("MS_Song===>>", i, "\nSinger====>>>", j)

    return data
Пример #4
0
def song_data():
    # header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
    url = "http://vlg.berryservice.net:8099/melon/list"

    sel_song = "#frm table tbody tr "

    get_song = mf.request(url).select(sel_song)

    album_ids = []
    song_no = []
    song_name = []
    singer = []
    genre = []
    lst = []

    # index page에서 노래 id와 노래 제목, 앨범 id 가져오기
    for i in get_song:
        song_number = i.attrs["data-song-no"]
        song_no.append(song_number)
        song_name.append((i.select_one('div.rank01 span a').text))
        singer.append(i.select_one('div.rank02 span').text)
        album_id_strings = i.select_one('div.ellipsis.rank03 a').attrs['href']
        pattern = re.compile("\'(.*)\'")
        album_id = re.findall(pattern, album_id_strings)[0]
        album_ids.append(album_id)

    # 곡 상세 페이지에서 장르 가져오기
    for num, song_num in enumerate(song_no):
        url2 = "http://vlg.berryservice.net:8099/melon/songdetail?songId=" + song_num

        # headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
        # params =  {'contsIds': ",".join(song_no)}

        song_data = "#downloadfrm div.wrap_info div.entry div.meta"
        get_data = mf.request(url2).select(song_data)

        # 위에서 받은 노래 id, 노래 이름, 앨범 id와 2번째 for loop에서 받아온 장르 담기
        for i in get_data:
            genre.append(i.select_one('dl.list dd:nth-of-type(3)').text)
            lst.append(
                [song_no[num], song_name[num], genre[num], album_ids[num]])
            print('Song ----->', (num + 1), 'record  --> done!')
    print("Finished Crawling Songs!!!!!")

    return lst
Пример #5
0
def album_data ():
    
    url = "http://vlg.berryservice.net:8099/melon/list"
    trs = mf.request(url).select('tbody tr[data-song-no]')

    album_data = []
    b = 0
    for tr in trs:
        album_json = tr.select('td:nth-of-type(4) a')
        album_title = tr.select_one('div.ellipsis.rank03 a').text
        
        # album id 가져오기
        for j in album_json:
            strings = j.attrs['href']
            pattern = re.compile("\'(.*)\'")
            album_id = re.findall(pattern, strings)
            
            # album 상세 페이지
            album_url = "http://vlg.berryservice.net:8099/melon/detail?albumId={}".format(album_id[0])

            # album 평점 json
            json_url = " http://vlg.berryservice.net:8099/melon/albumratejson?albumId={}".format(album_id[0])

            # album의 id, title, genre, 발매일, 발매사, 기획사 가져오기
            divs = mf.request(album_url).select_one('div.entry')
            dls = divs.select('div.meta dl.list')
            for dl in dls:
                releasedt = dl.select_one('dd:nth-of-type(1)').text
                album_genre = dl.select_one('dd:nth-of-type(2)').text
                album_comp = dl.select_one('dd:nth-of-type(3)').text
                entertainment = dl.select_one('dd:nth-of-type(4)').text
                b += 1

            # album의 평점 가져오기
            jsonData = mf.requestJson(json_url)
            rating = jsonData['infoGrade']['TOTAVRGSCORE']

            # 모든 column의 data 모으기
            album_data.append([album_id[0], album_title, album_genre, "{:.02f}".format(float(rating)) , releasedt, album_comp, entertainment])
            print("Album ----->", b , "record  --> done!")
    print ("Album_data has been downloaded!!!!")
    return (album_data)
Пример #6
0
def song_data():
    url = "http://vlg.berryservice.net:8099/melon/list"
    sel_song = "#frm table tbody tr "
    get_song = mf.request(url).select(sel_song)

    album_ids = []
    song_no = []
    song_name = []
    singer = []
    genre = []
    lst = []

    # index page에서 노래 id와 노래 제목, 앨범 id 가져오기
    for i in get_song:
        song_number = i.attrs["data-song-no"]
        song_no.append(song_number)
        song_name.append((i.select_one('div.rank01 span a').text))
        singer.append(i.select_one('div.rank02 span').text)
        album_id_strings = i.select_one('div.ellipsis.rank03 a').attrs['href']
        pattern = re.compile("\'(.*)\'")
        album_id = re.findall(pattern, album_id_strings)[0]
        album_ids.append(album_id)

    # 곡 상세 페이지에서 장르 가져오기
    for num, song_num in enumerate(song_no):
        url2 = "http://vlg.berryservice.net:8099/melon/songdetail?songId=" + song_num
        song_data = "#downloadfrm div.wrap_info div.entry div.meta"
        get_data = mf.request(url2).select(song_data)

        # 위에서 받은 노래 id, 노래 이름, 앨범 id와 2번째 for loop에서 받아온 장르 담기
        for i in get_data:
            genre.append(i.select_one('dl.list dd:nth-of-type(3)').text)
            lst.append(
                [song_no[num], song_name[num], genre[num], album_ids[num]])
            print('Song ----->', (num + 1), 'record  --> done!')
    # print(album_id)
    print("Finished Crawling Songs!!!!!")
    print(lst)

    return lst
Пример #7
0
def singer():
    url = "http://vlg.berryservice.net:8099/melon/list"
    singers = mf.request(url).select('tbody tr[data-song-no]')
    singer_info = []
    for singer in singers:
        singer_links = singer.select('td:nth-of-type(6) div.ellipsis.rank02 span a')
        for singer_link in singer_links:
            singer_name = singer_link.text
            singer_ids = singer_link.attrs['href']
            pattern = re.compile("\'(.*)\'")
            singer_id = re.findall(pattern, singer_ids)[0] 
            singer_info.append([singer_id, singer_name])
    return singer_info
Пример #8
0
def song_rank():
    now = datetime.datetime.now()

    likecnt = []
    song_no = []
    rank = []
    lst = []

    b = 0
    url = "http://vlg.berryservice.net:8099/melon/list"
    top_list = mf.request(url)
    sel = "#frm table tbody tr "

    # song id, rank 가져오기
    get_song = top_list.select(sel)
    for i in get_song:
        song_number = i.attrs["data-song-no"]
        song_no.append(song_number)
        rank.append(i.select_one('div span.rank').text)
        

    url2 = "http://vlg.berryservice.net:8099/melon/likejson"
    jsonData = mf.requestJson(url2)

    # 좋아요
    for j in jsonData['contsLike']:
        if str(j['CONTSID']) == str((song_no)[b]):
            likecnt.append(j['SUMMCNT'])
            b = b+1
        
    
    # update 일자
    date = now.strftime('%Y%m%d')

    # data 모으기
    for i in range (0,100):
        lst.append([song_no[i], rank[i], date, likecnt[i]])
        print("Rank_lst ----->", (i + 1) , "record  --> done!")
    
    print ("Ranking has been downloaded!!!!")
        
    return (lst)
Пример #9
0
from bs4 import BeautifulSoup
import requests
import time
import random
import json
import melon_function as mf
import album
import song_rank as sr
import singer as s
import make_songsinger as ms

url = "http://vlg.berryservice.net:8099/melon/list"
trs = mf.request(url).select('tbody tr[data-song-no]')

album_lst = album.album_data(trs)

album_insert = "insert ignore into Album (album_id, album_title, album_genre, rating, releasedt, album_comp, entertainment) values (%s, %s, %s, %s, %s, %s, %s) "
mf.save(album_lst, album_insert)

songs = mf.song_data()
mssong_insert = "insert ignore into MS_Song (song_no, title, genre, album_id) values (%s, %s, %s, %s) "
mf.save(songs, mssong_insert)

rank_lst = sr.song_rank()
rank_insert = "insert into Song_Rank (song_no, rank, rankdt, likecnt) values (%s, %s, %s, %s) "
mf.save(rank_lst, rank_insert)

singer_id_lst = s.singer()
singer_insert = "insert ignore into Singer(artist_id, name) values(%s, %s)"
mf.save(singer_id_lst, singer_insert)