def top20_fans(aid): # 男女粉丝的粉丝数TOP10 db = pymysql.connect(**getMySqlTx()) cursor = db.cursor() m_data = [] f_data = [] cursor.execute("SELECT followeds FROM {}_FansInfo WHERE gender = 1 GROUP BY followeds ORDER BY followeds DESC LIMIT 21;".format(aid)) for i,item in enumerate(cursor.fetchall()): if i != 0: m_data.append(item[0]) cursor.execute("SELECT followeds FROM {}_FansInfo WHERE gender = 2 GROUP BY followeds ORDER BY followeds DESC LIMIT 20;".format(aid)) for item in cursor.fetchall(): f_data.append(item[0]) db.close() c = ( Line(init_opts=opts.InitOpts(width='1900px', height='900px', page_title='男女粉丝的粉丝数TOP20'.format(aid))) .add_xaxis([str(i) for i in range(1, 21)]) .add_yaxis("男性", m_data) .add_yaxis("女性", f_data) .set_global_opts( title_opts=opts.TitleOpts(title="男女粉丝的粉丝数TOP20"), toolbox_opts=opts.ToolboxOpts(is_show=True), tooltip_opts=opts.TooltipOpts(is_show=True) ) ) c.render(os.path.join(BASE_PATH, 'Pyecharts_htmls\{}_男女粉丝的粉丝数TOP20.html'.format(aid)))
def get_artist_id(name, api): ''' 搜索歌手 :param name: 歌手名 :param api: api :return: 元组:(歌手ID,歌手名) ''' url_search_artist = 'http://localhost:3000/search?type=100&keywords={}'.format( name) resp = requests.get(url=url_search_artist) try: db = pymysql.connect(getMySqlTx()) cursor = db.cursor() artist = resp.json()['result']['artists'][0] try: cursor.execute("INSERT INTO T_Artists VALUES ({}, '{}')".format( artist['id'], artist['name'])) except Exception as e: print('INSERT INTO T_Artists' + str(e)) db.rollback() db.commit() except Exception as e: print(e) api.stopApi() api.startApi() return get_artist_id(name, api) finally: db.close() return (artist['id'], artist['name'])
def init_db(): ''' 初始化数据库 :return: None ''' db = pymysql.connect(**getMySqlTx()) cursor = db.cursor() #删除存在表 cursor.execute('drop table if exists T_Artists') cursor.execute('drop table if exists T_HotSongs') cursor.execute('drop table if exists T_FansInfo') #创建表 cursor.execute("CREATE TABLE T_Artists( \ id INT NOT NULL COMMENT 'aid 歌手ID' , \ name VARCHAR(128) COMMENT 'aname 歌手名' , \ PRIMARY KEY (id) \ ) COMMENT = '歌手表 ';;") cursor.execute("CREATE TABLE T_HotSongs( \ id INT NOT NULL COMMENT 'sid 歌曲ID' , \ rating INT COMMENT 'rating 热度排名' , \ name VARCHAR(128) COMMENT 'name 歌曲名' , \ aid INT COMMENT 'artist_id 歌手ID' , \ PRIMARY KEY (id) \ ) COMMENT = '热门歌曲表 ';;") db.close()
def get_city(aid): ''' 从数据库获取城市信息 :param aid: 歌手ID :return: 城市分布统计:{'北京市':123,...} ''' db = pymysql.connect(**getMySqlTx()) cursor = db.cursor() cursor.execute("SELECT city, COUNT(city) FROM {}_FansInfo WHERE city < 1000000 AND city > 1000 GROUP BY city".format(aid)) loccodes = get_loccodes() citys = {} for item in cursor.fetchall(): try: if citys.get(loccodes[str(item[0])], None) != None: citys[loccodes[str(item[0])]] = citys[loccodes[str(item[0])]] + item[1] else: citys[loccodes[str(item[0])]] = item[1] except: temp = item[0] - (item[0] % 1000) if citys.get(loccodes[str(temp)], None) != None: citys[loccodes[str(temp)]] = citys[loccodes[str(temp)]] + item[1] else: citys[loccodes[str(temp)]] = item[1] db.close() return citys
def request_info(aid, uid, api): ''' 爬取粉丝信息 :param aid: 歌手ID :param uid: 粉丝ID :param api: api :return: None ''' url_get_info = 'http://localhost:3000/user/detail?uid=' + str(uid) db = pymysql.connect(getMySqlTx()) cursor = db.cursor() resp = requests.get(url_get_info) if resp.json()['code'] == 404: cursor.execute("DELETE FROM `{}_FansInfo` WHERE id = {}".format( aid, uid)) db.commit() db.close() return total = resp.json() info = {} try: info['nickname'] = total['profile']['nickname'] info['gender'] = total['profile']['gender'] info['level'] = total['level'] info['city'] = total['profile']['city'] info['followeds'] = total['profile']['followeds'] info['follows'] = total['profile']['follows'] info['playlist'] = total['profile']['playlistCount'] try: cursor.execute( "UPDATE `163music_new`.`{}_FansInfo` SET `nickname` = '{}', `gender` = '{}', `level` = {}, `city` = {}, `followeds` = {}, `follows` = {}, `playlists` = {} WHERE `id` = {};" .format( aid, info['nickname'], info['gender'], info['level'], info['city'], info['followeds'], info['follows'], info['playlist'], uid, )) except Exception as e: print('update error:' + str(e)) db.commit() except Exception as e: print("request_info" + str(e)) api.stopApi() api.startApi() db.close() request_info(aid, uid, api) finally: db.close()
def get_fans_infos_multi_thread(aid, api): ''' 多线程爬取粉丝信息入口 :param aid: 歌手ID :param api: api :return: None ''' db = pymysql.connect(getMySqlTx()) cursor = db.cursor(pymysql.cursors.SSCursor) cursor.execute("SELECT id FROM {}_FansInfo WHERE city IS NULL".format(aid)) with ThreadPoolExecutor(64) as executor: for i in cursor: executor.submit(request_info, aid, i[0], api) db.close()
def get_ids(aid): ''' 获取歌手热门歌曲的ID :param aid: 歌手ID :return: 热门歌曲的ID,列表:[id,id,...] ''' ids = [] db = pymysql.connect(**getMySqlTx()) cursor = db.cursor() cursor.execute("SELECT id FROM `T_HotSongs` WHERE aid = {} ORDER BY rating".format(aid)) for row in cursor.fetchall(): ids.append(row[0]) db.close() return ids
def get_comments(id): ''' 从数据库获取歌曲的所有评论内容 :param id: 歌曲ID :return: 该歌曲的所有评论,列表:[str,str,...] ''' comments = [] db = pymysql.connect(**getMySqlTx()) cursor = db.cursor() cursor.execute("SELECT content FROM {}_Comments".format(id)) for row in cursor.fetchall(): comments.append(row[0]) db.close() return comments
def gender(aid): db = pymysql.connect(**getMySqlTx()) cursor = db.cursor() data = [] cursor.execute("SELECT gender,COUNT(*) FROM {}_FansInfo GROUP BY gender".format(aid)) for item in cursor.fetchall(): if item[0] == '0': data.append(('保密', item[1])) elif item[0] == '1': data.append(('男', item[1])) else: data.append(('女', item[1])) db.close() c = ( Pie(init_opts=opts.InitOpts(width='1900px', height='900px', page_title='粉丝性别分布'.format(aid))) .add('性别分布', data) .set_global_opts(title_opts=opts.TitleOpts(title="性别分布")) ) c.render(os.path.join(BASE_PATH, 'Pyecharts_htmls\{}_性别分布.html'.format(aid)))
def level(aid): db = pymysql.connect(**getMySqlTx()) cursor = db.cursor() data = [] cursor.execute("SELECT `level`,COUNT(*) FROM {}_FansInfo GROUP BY `level`".format(aid)) for item in cursor.fetchall(): data.append((str(item[0]) + '级', item[1])) db.close() c = ( Funnel(init_opts=opts.InitOpts(width='1900px', height='900px', page_title='粉丝等级分布'.format(aid))) .add( "等级分布", data, sort_="ascending", label_opts=opts.LabelOpts(position="inside"), ) .set_global_opts(title_opts=opts.TitleOpts(title="粉丝等级分布")) ) c.render(os.path.join(BASE_PATH, 'Pyecharts_htmls\{}_粉丝等级分布.html'.format(aid)))
def request_comment(url, song_info, api): ''' 获取歌曲评论 :param url: 需要爬取的url :param song_info: 歌曲信息即,get_songs返回的列表的item:{'rating':歌曲热门度, 'aid':歌手ID, 'sid':歌曲ID, 'sname':歌曲名} :param api: api :return: None ''' db = pymysql.connect(getMySqlTx()) resp = requests.get(url=url) comments = [] try: for item in resp.json()['comments']: comments.append([ item['commentId'], song_info['sid'], item['likedCount'], item['user']['userId'], item['user']['nickname'], item['content'] ]) except Exception as e: print(e) api.stopApi() api.startApi() request_comment(url, song_info, api) cursor = db.cursor() sql_comments = "INSERT INTO " + str( song_info['sid']) + "_Comments VALUES (%s, %s, %s, %s, %s, %s)" for comment in comments: try: cursor.execute(sql_comments, comment) except Exception as e: print('INSERT INTO Comments' + str(e)) db.rollback() db.commit() try: cursor.execute("INSERT INTO `163music_new`.`"+ str(song_info['aid']) + "_FansInfo`(`id`, `nickname`, `level`, `city`, `followeds`, `follows`, `playlists`) " \ "VALUES ({}, NULL, NULL, NULL, NULL, NULL, NULL);".format(comment[3])) except Exception as e: pass db.commit() db.close()
def get_songs(artist_info, api): ''' 获取歌手的前50首热门歌曲 :param artist_info: 歌手信息,即get_artist_id返回的元组:(歌手ID,歌手名) :param api: api :return: 列表:[{'rating':歌曲热门度, 'aid':歌手ID, 'sid':歌曲ID, 'sname':歌曲名},...] ''' url_get_artist_songs = 'http://localhost:3000/artists?id={}'.format( artist_info[0]) resp = requests.get(url=url_get_artist_songs) songs = [] try: for index, item in enumerate(resp.json()['hotSongs']): songs.append({ 'rating': index + 1, 'aid': artist_info[0], 'sid': item['id'], 'sname': item['name'] }) # songs.append(models.HotSongs(id = item['id'], rating = index + 1, name = item['name'], artist_id = artist_info[0])) db = pymysql.connect(getMySqlTx()) cursor = db.cursor() for song in songs: try: cursor.execute( "INSERT INTO T_HotSongs VALUES ({}, {}, '{}', {})".format( song['sid'], song['rating'], song['sname'], song['aid'])) except Exception as e: print('INSERT INTO T_HotSongs' + str(e)) db.rollback() db.commit() except Exception as e: print('get_songs' + str(e)) api.stopApi() api.startApi() return get_songs(artist_info, api) finally: db.close() return songs