def view_capture(self, link): self.session.query(pysql.Playlist163).filter( pysql.Playlist163.link == link).update({'over': 'Y'}) url = self.__url + str(link) s = requests.session() try: s = BeautifulSoup( s.get(url, headers=self.__headers).content, "html.parser") musics = json.loads(s.text)['result']['tracks'] exist = 0 for music in musics: name = music['name'].encode('utf-8') author = music['artists'][0]['name'].encode('utf-8') if pysql.single("music163", "song_id", (music['id'])) == True: self.session.add( pysql.Music163(song_id=music['id'], song_name=name, author=author)) self.session.commit() exist = exist + 1 else: pylog.log.info('{} : {} {}'.format("重复抓取歌曲", name, "取消持久化")) print("歌单包含歌曲 {} 首,数据库 merge 歌曲 {} 首 \r\n".format( len(musics), exist)) except Exception: pylog.log.error('{} : {}'.format("抓取歌单页面存在问题", url)) raise
def view_links(self, song_id): url = "http://music.163.com/song?id=" + str(song_id) data = {'id': str(song_id)} headers = { 'Cookie': 'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;' } try: req = requests.get(url, headers=headers, data=data, timeout=100) sup = BeautifulSoup(req.content, "html.parser") for link in sup.find_all('li', class_="f-cb"): html = link.find('a', 's-fc1') if html != None: title = html.get('title').encode('utf-8') song_id = html.get('href')[9:] author = link.find('div', 'f-thide s-fc4').find( 'span').get('title').encode('utf-8') if pysql.single("music163", "song_id", song_id) == True: self.session.add( pysql.Music163(song_id=song_id, song_name=title, author=author)) self.session.flush() for link in sup.find_all('a', 'sname f-fs1 s-fc0'): play_link = link.get("href").replace("/playlist?id=", "") play_name = link.get("title").encode('utf-8') if pysql.single("playlist163", "link", play_link) == True: self.session.add( pysql.Playlist163(title=play_name, link=play_link, cnt=-1)) self.session.flush() except: self.session.rollback() pylog.Log("ERROR 917 : VIEW LINK SONG_ID-" + str(song_id))
def view_links(self, song_id): url = "http://music.163.com/song?id=" + str(song_id) data = {'id': str(song_id)} headers = { 'Cookie': 'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;' # noqa } try: req = requests.get(url, headers=headers, data=data, timeout=100) sup = BeautifulSoup(req.content, "html.parser") for link in sup.find_all('li', class_="f-cb"): html = link.find('a', 's-fc1') if html is not None: title = tools.encode(html.get('title')) song_id = html.get('href')[9:] author = tools.encode(link.find( 'div', 'f-thide s-fc4' ).find('span').get('title')) if pysql.single("music163", "song_id", song_id) is True: self.session.add(pysql.Music163( song_id=song_id, song_name=title, author=author )) self.session.flush() for link in sup.find_all('a', 'sname f-fs1 s-fc0'): play_link = link.get("href").replace("/playlist?id=", "") play_name = tools.encode(link.get("title")) if pysql.single("playlist163", "link", play_link) is True: self.session.add(pysql.Playlist163( title=play_name, link=play_link, cnt=-1, dsc="来源:热评" )) self.session.flush() except Exception as e: pylog.log.error("解析页面推荐时出现问题:{} 歌曲ID:{}".format(e, song_id))
def view_capture(self, link): url = self.__url + str(link) songs = [] try: data = self.curl_playlist(link) musics = data['tracks'] exist = 0 for music in musics: name = tools.encode(music['name']) author = tools.encode(music['artists'][0]['name']) if music["bMusic"] is None: play_time = 0 else: play_time = music["bMusic"]["playTime"] if pysql.single("music163", "song_id", (music['id'])) is True: self.session.add(pysql.Music163(song_id=music['id'],song_name=name,author=author,playTime=play_time)) self.session.commit() exist = exist + 1 songs.append({"name": name,"author": author}) else: pylog.log.info('{} : {} {}'.format("重复抓取歌曲", name, "取消持久化")) print("歌单包含歌曲 {} 首,数据库 merge 歌曲 {} 首 \r\n".format(len(musics), exist)) self.session.query(pysql.Playlist163).filter(pysql.Playlist163.link == link).update({'done': 'Y'}) self.session.commit() return songs except Exception as e: pylog.log.error("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, url)) self.session.query(pysql.Playlist163).filter(pysql.Playlist163.link == url).update({'done': 'E'}) self.session.commit() raise
def view_capture(self, link): self.session.query(pysql.Playlist163).filter( pysql.Playlist163.link == link).update({'over': 'Y'}) url = self.__url + str(link) s = requests.session() try: s = BeautifulSoup( s.get(url, headers=self.__headers).content, "html.parser") musics = json.loads(s.text)['result']['tracks'] for music in musics: name = music['name'].encode('utf-8') author = music['artists'][0]['name'].encode('utf-8') if pysql.single("music163", "song_id", (music['id'])) == True: self.session.add( pysql.Music163(song_id=music['id'], song_name=name, author=author)) self.session.commit() else: pylog.Log('{} : {} {}'.format("ERROR 103", name, "Not Single")) except: pylog.Log('{} : {}'.format("Error 901", url))
def view_capture(self, link): url = self.__url + str(link) songs = [] try: data = self.curl_playlist(link) musics = data['tracks'] exist = 0 for music in musics: name = tools.encode(music['name']) authors = [] for art in music['artists']: authors.append(tools.encode(art['name'])) if music["bMusic"] is None: play_time = 0 else: play_time = music["bMusic"]["playTime"] if pysql.single("music163", "song_id", (music['id'])) is True: self.session.add( pysql.Music163(song_id=music['id'], song_name=name, author=",".join(authors), playTime=play_time)) self.session.commit() exist = exist + 1 songs.append({"name": name, "author": ",".join(authors)}) else: pylog.log.info('{} : {} {}'.format("重复抓取歌曲", name, "取消持久化")) # 处理官方榜单 if int(link) in uapi.top.keys(): updateTime = datetime.datetime.fromtimestamp( data['updateTime'] / 1000).strftime("%Y-%m-%d %H:%M:%S") createTime = datetime.datetime.fromtimestamp( data['createTime'] / 1000).strftime("%Y-%m-%d %H:%M:%S") position = music['position'] lastrank = 100000000 with tools.ignored(Exception): lastrank = music['lastRank'] cnt = self.session.query(pysql.Toplist163).filter( pysql.Toplist163.update_time == updateTime, pysql.Toplist163.song_id == music['id'], pysql.Toplist163.playlist_id == link).count() mcnt = self.session.query(pysql.Toplist163).filter( pysql.Toplist163.mailed == "Y", pysql.Toplist163.song_id == music['id'], pysql.Toplist163.playlist_id == link).count() if cnt == 0: mailed = "N" if mcnt > 0: mailed = "Y" self.session.add( pysql.Toplist163(song_id=music['id'], song_name=name, author=",".join(authors), playTime=play_time, position=position, playlist_id=link, lastRank=lastrank, mailed=mailed, create_time=createTime, update_time=updateTime)) self.session.commit() print("歌单包含歌曲 {} 首,数据库 merge 歌曲 {} 首 \r\n".format( len(musics), exist)) self.session.query(pysql.Playlist163).filter( pysql.Playlist163.link == link).update({ 'done': 'Y', 'update_time': datetime.datetime.now().strftime("%Y-%m-%d %H:%S:%M") }) self.session.commit() return songs except Exception as e: pylog.log.error("抓取歌单页面存在问题:{} 歌单ID:{}".format(e, url)) self.session.query(pysql.Playlist163).filter( pysql.Playlist163.link == url).update({ 'done': 'E', 'update_time': datetime.datetime.now().strftime("%Y-%m-%d %H:%S:%M") }) self.session.commit()