def view_capture(self, page, type="全部"): s = requests.session() play_url = self.__play_url.format(type, page * 35) try: acmsk = {'class': 'msk'} scnb = {'class': 'nb'} dcu = {'class': 'u-cover u-cover-1'} ucm = {'class': 'm-cvrlst f-cb'} s = BeautifulSoup( s.get(play_url, headers=self.__headers).content, "html.parser") lst = s.find('ul', ucm) for play in lst.find_all('div', dcu): title = play.find('a', acmsk)['title'].encode('utf-8') link = play.find('a', acmsk)['href'].encode('utf-8').replace( "/playlist?id=", "") cnt = play.find('span', scnb).text.encode('utf-8').replace( '万', '0000') if pysql.single("playlist163", "link", link) is True: pl = pysql.Playlist163(title=title, link=link, cnt=int(cnt), dsc="曲风:{}".format(type)) self.session.add(pl) self.session.commit() except Exception as e: pylog.log.error("抓取歌单出现问题:{} 歌单类型:{} 页码:{}".format(e, type, page)) raise
def view_links(self, song_id): url = "http://music.163.com/song?id=" + str(song_id) data = {'id': str(song_id)} headers = { 'Cookie': 'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;' } try: req = requests.get(url, headers=headers, data=data, timeout=100) sup = BeautifulSoup(req.content, "html.parser") for link in sup.find_all('li', class_="f-cb"): html = link.find('a', 's-fc1') if html != None: title = html.get('title').encode('utf-8') song_id = html.get('href')[9:] author = link.find('div', 'f-thide s-fc4').find( 'span').get('title').encode('utf-8') if pysql.single("music163", "song_id", song_id) == True: self.session.add( pysql.Music163(song_id=song_id, song_name=title, author=author)) self.session.flush() for link in sup.find_all('a', 'sname f-fs1 s-fc0'): play_link = link.get("href").replace("/playlist?id=", "") play_name = link.get("title").encode('utf-8') if pysql.single("playlist163", "link", play_link) == True: self.session.add( pysql.Playlist163(title=play_name, link=play_link, cnt=-1)) self.session.flush() except: self.session.rollback() pylog.Log("ERROR 917 : VIEW LINK SONG_ID-" + str(song_id))
def view_links(self, song_id): url = "http://music.163.com/song?id=" + str(song_id) data = {'id': str(song_id)} headers = { 'Cookie': 'MUSIC_U=e45797021db3403ab9fffb11c0f70a7994f71177b26efb5169b46948f2f9a60073d23a2665346106c9295f8f6dbb6c7731b299d667364ed3;' # noqa } try: req = requests.get(url, headers=headers, data=data, timeout=100) sup = BeautifulSoup(req.content, "html.parser") for link in sup.find_all('li', class_="f-cb"): html = link.find('a', 's-fc1') if html is not None: title = tools.encode(html.get('title')) song_id = html.get('href')[9:] author = tools.encode(link.find( 'div', 'f-thide s-fc4' ).find('span').get('title')) if pysql.single("music163", "song_id", song_id) is True: self.session.add(pysql.Music163( song_id=song_id, song_name=title, author=author )) self.session.flush() for link in sup.find_all('a', 'sname f-fs1 s-fc0'): play_link = link.get("href").replace("/playlist?id=", "") play_name = tools.encode(link.get("title")) if pysql.single("playlist163", "link", play_link) is True: self.session.add(pysql.Playlist163( title=play_name, link=play_link, cnt=-1, dsc="来源:热评" )) self.session.flush() except Exception as e: pylog.log.error("解析页面推荐时出现问题:{} 歌曲ID:{}".format(e, song_id))
def view_capture(self, page, type="全部"): play_url = self.__play_url.format(type, page * 35) titles = [] try: acmsk = {'class': 'msk'} scnb = {'class': 'nb'} dcu = {'class': 'u-cover u-cover-1'} ucm = {'class': 'm-cvrlst f-cb'} data = tools.curl(play_url,self.__headers,type=const.RETURE_HTML) lst = data.find('ul', ucm) for play in lst.find_all('div', dcu): title = tools.encode(play.find('a', acmsk)['title']) link = tools.encode(play.find('a', acmsk)['href']).replace("/playlist?id=", "") cnt = tools.encode(play.find('span', scnb).text).replace('万', '0000') if pysql.single("playlist163","link",link) is True: pl = pysql.Playlist163(title=title, link=link, cnt=int(cnt), dsc="曲风:{}".format(type)) self.session.add(pl) self.session.commit() titles.append(title) return titles except Exception as e: pylog.log.error("抓取歌单出现问题:{} 歌单类型:{} 页码:{}".format(e, type, page)) raise