def view_capture(self, song_id, page=1): if page == 1: self.session.query(pysql.Comment163).filter( pysql.Comment163.song_id == song_id).delete() self.session.commit() data = { 'params': self.createParams(page), 'encSecKey': self.__encSecKey } url = default.comment_url.format(str(song_id)) try: req = requests.post(url, headers=self.__headers, data=data, timeout=10) for comment in req.json()['comments']: if comment['likedCount'] > 30: txt = comment['content'].encode('utf-8') author = comment['user']['nickname'].encode('utf-8') liked = comment['likedCount'] self.session.add( pysql.Comment163(song_id=song_id, txt=txt, author=author, liked=liked)) self.session.flush() if page == 1: for comment in req.json()['hotComments']: txt = comment['content'].encode('utf-8') author = comment['user']['nickname'].encode('utf-8') liked = comment['likedCount'] self.session.add( pysql.Comment163(song_id=song_id, txt=txt, author=author, liked=liked)) self.session.flush() cnt = int(req.json()['total']) self.session.query(pysql.Music163).filter( pysql.Music163.song_id == song_id).update({ 'over': 'Y', 'comment': cnt }) self.session.commit() return cnt / 20 except KeyboardInterrupt: print("INFO : 解释器请求退出") pylog.Log("ERROR 107 : 解释器请求退出") exit() except: self.session.rollback() raise pylog.Log("ERROR 910 : SONG_ID-" + str(song_id) + " PAGE-" + str(page))
def view_links(self, song_id): url = "http://music.163.com/song?id=" + str(song_id) data = {'id': str(song_id)} headers = { 'Cookie': 'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;' } try: req = requests.get(url, headers=headers, data=data, timeout=100) sup = BeautifulSoup(req.content, "html.parser") for link in sup.find_all('li', class_="f-cb"): html = link.find('a', 's-fc1') if html != None: title = html.get('title').encode('utf-8') song_id = html.get('href')[9:] author = link.find('div', 'f-thide s-fc4').find( 'span').get('title').encode('utf-8') if pysql.single("music163", "song_id", song_id) == True: self.session.add( pysql.Music163(song_id=song_id, song_name=title, author=author)) self.session.flush() for link in sup.find_all('a', 'sname f-fs1 s-fc0'): play_link = link.get("href").replace("/playlist?id=", "") play_name = link.get("title").encode('utf-8') if pysql.single("playlist163", "link", play_link) == True: self.session.add( pysql.Playlist163(title=play_name, link=play_link, cnt=-1)) self.session.flush() except: self.session.rollback() pylog.Log("ERROR 917 : VIEW LINK SONG_ID-" + str(song_id))
def view_capture(self, page): s = requests.session() play_url = self.__play_url + str(page * 35) try: acmsk = {'class': 'msk'} scnb = {'class': 'nb'} dcu = {'class': 'u-cover u-cover-1'} ucm = {'class': 'm-cvrlst f-cb'} s = BeautifulSoup( s.get(play_url, headers=self.__headers).content, "html.parser") lst = s.find('ul', ucm) for play in lst.find_all('div', dcu): title = play.find('a', acmsk)['title'].encode('utf-8') link = play.find('a', acmsk)['href'].encode('utf-8').replace( "/playlist?id=", "") cnt = play.find('span', scnb).text.encode('utf-8').replace( '万', '0000') if pysql.single("playlist163", "link", link) == True: pl = pysql.Playlist163(title=title, link=link, cnt=int(cnt)) self.session.add(pl) self.session.commit() except: pylog.Log("抓取歌单出现问题,歌单页码:" + page)
def view_lyric(self, song_id): url = default.lyric_url.format(str(song_id)) s = requests.session() try: s = BeautifulSoup( s.get(url, headers=self.__headers).content, "html.parser") lrc = json.loads(s.text)['lrc']['lyric'] if pysql.single("lyric163", "song_id", song_id): self.session.add(pysql.Lyric163(song_id=song_id, txt=lrc)) self.session.commit() except: pylog.Log("抓取歌词出现问题,歌曲ID:" + str(song_id))
def view_capture(self, link): self.session.query(pysql.Playlist163).filter( pysql.Playlist163.link == link).update({'over': 'Y'}) url = self.__url + str(link) s = requests.session() try: s = BeautifulSoup( s.get(url, headers=self.__headers).content, "html.parser") musics = json.loads(s.text)['result']['tracks'] for music in musics: name = music['name'].encode('utf-8') author = music['artists'][0]['name'].encode('utf-8') if pysql.single("music163", "song_id", (music['id'])) == True: self.session.add( pysql.Music163(song_id=music['id'], song_name=name, author=author)) self.session.commit() else: pylog.Log('{} : {} {}'.format("ERROR 103", name, "Not Single")) except: pylog.Log('{} : {}'.format("Error 901", url))
def curl_playlist(self,playlist_id): url = uapi.playlist_api.format(playlist_id) try: data = tools.curl(url, self.__headers) playlist = data['result'] self.session.query(pysql.Playlist163).\ filter(pysql.Playlist163.link == playlist_id).\ update({"playCount": playlist["playCount"], "shareCount": playlist["shareCount"], "commentCount": playlist["commentCount"], "description": playlist["description"], "tags":",".join(playlist["tags"])}) return playlist except Exception as e: pylog.Log("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, playlist_id)) # pylog.print_warn("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, playlist_id)) self.session.query(pysql.Playlist163).filter(pysql.Playlist163.link == playlist_id).update({'done': 'E'}) self.session.commit()
def auto_view(self, count=1): try: if count < 10: msc = self.session.query(pysql.Music163).filter( pysql.Music163.over == "N").limit(count) for m in msc: self.views_capture(m.song_id, 1, 1) else: for i in range(count / 10): msc = self.session.query(pysql.Music163).filter( pysql.Music163.over == "N").limit(10) for m in msc: self.views_capture(m.song_id, 1, 1) msc = self.session.query(pysql.Music163).filter( pysql.Music163.over == "N").limit(count % 10) for m in msc: self.views_capture(m.song_id, 1, 1) except: pylog.Log("ERROR 918 : AUTO VIEW")