#!/usr/bin/python # -*- coding: utf-8 -*- artist_ = 'artist8' song_ = 'song9' artist_log_ = 'artist_log9' failed_ = 'failed' import threading import common import pymongo hot = int(common.get_argv('-hot', 10000)) class db(object): def mode(self): artist_list = [] for (k_, v_) in self.__artist_map.items(): if k_ and k_ not in self.__artist_log_map and v_ and v_ == -1: artist_list.append(k_) return artist_list def mode2(self): artist_list = [] for (k_, v_) in self.__artist_map.items(): if k_ and k_ not in self.__artist_log_map and v_ and v_ > hot: artist_list.append(k_) return artist_list
reload(sys) sys.setdefaultencoding('utf8') import os, time, math, pdb, threading import common, db, job_artist, job_song, job_hotnum #just once db_ = db.db('mongodb://192.168.20.66:27017/', 'local') rs = job_artist.Start(db_) artist_list = db_.mode() count = len(artist_list) t = int(common.get_argv('-t', 1)) print 'Thread : ' + str(t) b = int(math.ceil(count / t)) threads = [] if '--debug' in sys.argv: job_hotnum.Start(db_, artist_list) else: print 't is %d, b is %d' % (t, b) for i in range(0, t): begin = b * i end = b * (i + 1) if end >= count:
def Start(db_, artist_list): GetSongs_URL_Template_ = 'http://music.baidu.com/data/user/getsongs?start=%s&ting_uid=%s&order=hot' SongLink_URL_Template_ = 'http://play.baidu.com/data/music/songlink?songIds=%s' PRE_URL_ = 'http://play.baidu.com' Find_Song_Switch_ = [False] Artist_Id_ = '' Order_ = [0] SongNameMap = {} h = '127.0.0.1:8098' if '-h' in sys.argv: h_index = sys.argv.index('-h') if h_index and h_index > 0 and len(sys.argv) > h_index + 1: h = sys.argv[h_index + 1] order = int(common.get_argv('-order', 25)) RIAK_HOSTNAME = h RIAK_URL_TEMPLATE = '/buckets/music/keys/%s' RIAK_LRC_URL_TEMPLATE = '/buckets/lrc/keys/%s' dwnn = int(common.get_argv('-dwnn', 25)) ELS_HOSTNAME = str(common.get_argv('-esh', 'localhost:9200')) ELS_URL_TEMPLATE = '/local/music/%s' dwn_music = [common.Downloader(RIAK_HOSTNAME, RIAK_URL_TEMPLATE, dwnn)] dwn_lrc = [common.Downloader(RIAK_HOSTNAME, RIAK_LRC_URL_TEMPLATE, dwnn)] elsup = [common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)] def elsup_destruct(this_): elsup[0].close() elsup[0] = common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn) elsup[0].evtExpire = elsup_destruct elsup[0].evtExpire = elsup_destruct def dwn_music_destruct(this_): dwn_music[0].close() dwn_music[0] = common.Downloader(RIAK_HOSTNAME, RIAK_URL_TEMPLATE, dwnn) dwn_music[0].evtExpire = dwn_music_destruct dwn_music[0].evtExpire = dwn_music_destruct def dwn_lrc_destruct(this_): dwn_lrc[0].close() dwn_lrc[0] = common.Downloader(RIAK_HOSTNAME, RIAK_LRC_URL_TEMPLATE, dwnn) dwn_lrc[0].evtExpire = dwn_lrc_destruct dwn_lrc[0].evtExpire = dwn_lrc_destruct def Find_Song_Link(tag, attrs): try: if tag == 'a': for k, v in attrs: if (k and k == 'href' and v and v.find('/song/') != -1): href_ = v[v.find('/song/') + len('/song/'):] if href_.find('/') != -1: href_ = href_[:href_.find('/')] #Song_List_.add(href_) raw_content = common.http_read(SongLink_URL_Template_ % href_) if raw_content is None: continue raw_object = json.loads(raw_content) songList = raw_object['data']['songList'] if len(songList) > 0: song_ = songList[0] songId = song_['songId'] songName = song_['songName'] lrclink = PRE_URL_ + song_['lrcLink'] songlink = song_['songLink'] rate = song_['rate'] size = song_['size'] artist_id = Artist_Id_ if songName not in SongNameMap: SongNameMap[songName] = None if (order > Order_[0] and songlink and songlink != ''): #important db_.add_song(songId, songName, lrclink, songlink, rate, size, artist_id, Order_[0]) obj = { "songId": songId, "songName": songName, "rate": rate, "size": size, "order": Order_[0], "artistId": artist_id } elsup[0].transfer(json.dumps(obj), songId) #elsup[0].transfer('{'\ # '"songId": %d,'\ # '"songName": "%s",'\ # '"rate": %d,'\ # '"size": %d,'\ # '"order": %d,'\ # '"artistId": "%s"}' % (songId, songName, rate, size, Order_[0], artist_id), songId) for i in range(0, 3): if i > 0: common.log( 'try download music %s again, time: %d' % (songId, i)) if dwn_music[0].transfer( songlink, songId, 'audio/mpeg'): break elif i == 2: db_.add_failed( songlink, songId, 'audio/mpeg', 1) if lrclink.endswith('.lrc'): for i in range(0, 3): if i > 0: common.log( 'try download lrc %s again, time: %d' % (songId, i)) if dwn_lrc[0].transfer( lrclink, songId, 'text/plain'): break elif i == 2: db_.add_failed( lrclink, songId, 'text/plain', 2) Order_[0] = Order_[0] + 1 #Order_[0] = Order_[0] + 1 print 'song %d has been saved.' % songId Find_Song_Switch_[0] = True except Exception, e: common.log('Find_Song_Link: ' + str(e))
#!/usr/bin/python # -*- coding: utf-8 -*- import sys reload(sys) sys.setdefaultencoding('utf8') from HTMLParser import HTMLParser import urllib, json import common, db dwnn = int(common.get_argv('-dwnn', 25)) ELS_HOSTNAME = str(common.get_argv('-esh', 'localhost:9200')) ELS_URL_TEMPLATE = '/local/artist/%s' elsup = [common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)] def elsup_destruct(this_): elsup[0].close() elsup[0] = common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn) elsup[0].evtExpire = elsup_destruct elsup[0].evtExpire = elsup_destruct class HotNumParser(HTMLParser): def __init__(self):
#!/usr/bin/python # -*- coding: utf-8 -*- import sys reload(sys) sys.setdefaultencoding('utf8') from HTMLParser import HTMLParser import urllib, json import common, db dwnn = int(common.get_argv('-dwnn', 25)) ELS_HOSTNAME = str(common.get_argv('-esh', 'localhost:9200')) ELS_URL_TEMPLATE = '/local/artist/%s' elsup = [common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)] def elsup_destruct(this_): elsup[0].close() elsup[0] = common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn) elsup[0].evtExpire = elsup_destruct elsup[0].evtExpire = elsup_destruct class HotNumParser(HTMLParser): def __init__(self): self.processing = None self.hotnum = 0 HTMLParser.__init__(self)
def Start(db_, artist_list): GetSongs_URL_Template_ = "http://music.baidu.com/data/user/getsongs?start=%s&ting_uid=%s&order=hot" SongLink_URL_Template_ = "http://play.baidu.com/data/music/songlink?songIds=%s" PRE_URL_ = "http://play.baidu.com" Find_Song_Switch_ = [False] Artist_Id_ = "" Order_ = [0] SongNameMap = {} h = "127.0.0.1:8098" if "-h" in sys.argv: h_index = sys.argv.index("-h") if h_index and h_index > 0 and len(sys.argv) > h_index + 1: h = sys.argv[h_index + 1] order = int(common.get_argv("-order", 25)) RIAK_HOSTNAME = h RIAK_URL_TEMPLATE = "/buckets/music/keys/%s" RIAK_LRC_URL_TEMPLATE = "/buckets/lrc/keys/%s" dwnn = int(common.get_argv("-dwnn", 25)) ELS_HOSTNAME = str(common.get_argv("-esh", "localhost:9200")) ELS_URL_TEMPLATE = "/local/music/%s" dwn_music = [common.Downloader(RIAK_HOSTNAME, RIAK_URL_TEMPLATE, dwnn)] dwn_lrc = [common.Downloader(RIAK_HOSTNAME, RIAK_LRC_URL_TEMPLATE, dwnn)] elsup = [common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)] def elsup_destruct(this_): elsup[0].close() elsup[0] = common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn) elsup[0].evtExpire = elsup_destruct elsup[0].evtExpire = elsup_destruct def dwn_music_destruct(this_): dwn_music[0].close() dwn_music[0] = common.Downloader(RIAK_HOSTNAME, RIAK_URL_TEMPLATE, dwnn) dwn_music[0].evtExpire = dwn_music_destruct dwn_music[0].evtExpire = dwn_music_destruct def dwn_lrc_destruct(this_): dwn_lrc[0].close() dwn_lrc[0] = common.Downloader(RIAK_HOSTNAME, RIAK_LRC_URL_TEMPLATE, dwnn) dwn_lrc[0].evtExpire = dwn_lrc_destruct dwn_lrc[0].evtExpire = dwn_lrc_destruct def Find_Song_Link(tag, attrs): try: if tag == "a": for k, v in attrs: if k and k == "href" and v and v.find("/song/") != -1: href_ = v[v.find("/song/") + len("/song/") :] if href_.find("/") != -1: href_ = href_[: href_.find("/")] # Song_List_.add(href_) raw_content = common.http_read(SongLink_URL_Template_ % href_) if raw_content is None: continue raw_object = json.loads(raw_content) songList = raw_object["data"]["songList"] if len(songList) > 0: song_ = songList[0] songId = song_["songId"] songName = song_["songName"] lrclink = PRE_URL_ + song_["lrcLink"] songlink = song_["songLink"] rate = song_["rate"] size = song_["size"] artist_id = Artist_Id_ if songName not in SongNameMap: SongNameMap[songName] = None if order > Order_[0] and songlink and songlink != "": # important db_.add_song(songId, songName, lrclink, songlink, rate, size, artist_id, Order_[0]) obj = { "songId": songId, "songName": songName, "rate": rate, "size": size, "order": Order_[0], "artistId": artist_id, } elsup[0].transfer(json.dumps(obj), songId) # elsup[0].transfer('{'\ # '"songId": %d,'\ # '"songName": "%s",'\ # '"rate": %d,'\ # '"size": %d,'\ # '"order": %d,'\ # '"artistId": "%s"}' % (songId, songName, rate, size, Order_[0], artist_id), songId) for i in range(0, 3): if i > 0: common.log("try download music %s again, time: %d" % (songId, i)) if dwn_music[0].transfer(songlink, songId, "audio/mpeg"): break elif i == 2: db_.add_failed(songlink, songId, "audio/mpeg", 1) if lrclink.endswith(".lrc"): for i in range(0, 3): if i > 0: common.log("try download lrc %s again, time: %d" % (songId, i)) if dwn_lrc[0].transfer(lrclink, songId, "text/plain"): break elif i == 2: db_.add_failed(lrclink, songId, "text/plain", 2) Order_[0] = Order_[0] + 1 # Order_[0] = Order_[0] + 1 print "song %d has been saved." % songId Find_Song_Switch_[0] = True except Exception, e: common.log("Find_Song_Link: " + str(e))