示例#1
0
class wangyiyun():
    def __init__(self):
        options = Options()
        options.headless = True
        self.driver = webdriver.Firefox(options=options)
        # 连接数据库
        self.mysqlCommand = MySQLCommand()
        self.mysqlCommand.connectdb()
        # 每次查询数据库中最后一条数据的ID,新加的数据每成功插入一条ID+1
        self.music = {}

    def run(self):
        self.mysqlCommand.cursor.execute(
            "select url ,singer_name from table_singer")
        urls = self.mysqlCommand.cursor.fetchall()

        for odd, url in enumerate(urls):

            if url.get('url') != None and odd % 2 == 0:
                self.driver.get(url.get('url'))
                time.sleep(4)
                self.driver.switch_to.frame(
                    self.driver.find_element_by_name('contentFrame'))
                time.sleep(1)
                source = self.driver.page_source
                # print(url.get('list_url'))
                # print(source)
                html = etree.HTML(source)
                time.sleep(1)

                song_name = html.xpath(
                    "//div[@class='j-flag']//div[@class='ttc']/span[@class='txt']/a/b/@title"
                )

                song_url = html.xpath(
                    "//div[@class='j-flag']//div[@class='ttc']/span[@class='txt']/a/@href"
                )

                album = html.xpath("//div[@class='text']/a/@title")

                singer = url.get('singer_name')

                for i in range(len(song_name)):
                    song_n = re.sub(r'\\xa0', ' ', song_name[i])
                    song_u = 'https://music.163.com' + song_url[i]
                    albums = re.sub(r'\\xa0', ' ', album[i])
                    print(song_n, '+', song_u, '+', albums, '+', singer)
                    try:
                        self.mysqlCommand.insert_musicData(
                            song_n, song_u, albums, singer)
                        print('==' * 20)
                    except:
                        pass
                    print('==' * 20)
示例#2
0
class wangyiyun():
    def __init__(self):
        options = Options()
        options.headless = True
        self.driver = webdriver.Firefox(options=options)
        # 连接数据库
        self.mysqlCommand = MySQLCommand()
        self.mysqlCommand.connectdb()
        # 每次查询数据库中最后一条数据的ID,新加的数据每成功插入一条ID+1
        self.music = {}

    def run(self):
        self.mysqlCommand.cursor.execute("select list_url from song_list")
        list_song = self.mysqlCommand.cursor.fetchall()

        for odd, url in enumerate(list_song):

            if url.get('list_url') != None and odd % 3 == 0:
                self.driver.get(url.get('list_url'))
                time.sleep(4)
                self.driver.switch_to.frame(
                    self.driver.find_element_by_name('contentFrame'))
                time.sleep(1)
                source = self.driver.page_source
                # print(url.get('list_url'))
                # print(source)
                html = etree.HTML(source)
                time.sleep(1)

                song_name = re.findall(r'"><b title="(.*?)">', source,
                                       re.DOTALL)

                song_url = re.findall(
                    r'<div class="ttc"><span class="txt"><a href="(.*?)"><b',
                    source, re.DOTALL)

                album = html.xpath("//div[@class='text']/a/@title")

                singer = html.xpath("//div[@class='text']/@title")

                singer_url = html.xpath("//div[@class='text']/span/a/@href")
                for i in range(len(song_name)):
                    song_n = re.sub(r'&nbsp;', ' ', song_name[i])
                    song_u = 'https://music.163.com' + song_url[i]
                    singerurl = 'https://music.163.com' + singer_url[i]
                    print(singer[i], singerurl)
                    try:
                        self.mysqlCommand.insert_musicData(
                            song_n, song_u, album[i], singer[i])
                        self.mysqlCommand.insert_singer(singer[i], singerurl)
                        print('==' * 20)
                    except Exception as e:
                        print(e)
                        pass
示例#3
0
class wangyiyun():
    def __init__(self):
        options = Options()
        options.headless = True
        self.driver = webdriver.Firefox(options=options)
        self.url = [
            'https://music.163.com/#/discover/toplist?id=19723756',
            'https://music.163.com/#/discover/toplist?id=3779629',
            'https://music.163.com/#/discover/toplist?id=2884035',
            'https://music.163.com/#/discover/toplist?id=3778678',
            'https://music.163.com/#/discover/toplist?id=991319590',
            'https://music.163.com/#/discover/toplist?id=71384707',
            'https://music.163.com/#/discover/toplist?id=1978921795',
            'https://music.163.com/#/discover/toplist?id=2250011882',
            'https://music.163.com/#/discover/toplist?id=2617766278',
            'https://music.163.com/#/discover/toplist?id=71385702',
            'https://music.163.com/#/discover/toplist?id=745956260',
            'https://music.163.com/#/discover/toplist?id=10520166',
            'https://music.163.com/#/discover/toplist?id=2023401535',
            'https://music.163.com/#/discover/toplist?id=2006508653',
            'https://music.163.com/#/discover/toplist?id=180106',
            'https://music.163.com/#/discover/toplist?id=60198',
            'https://music.163.com/#/discover/toplist?id=3812895',
            'https://music.163.com/#/discover/toplist?id=27135204',
            'https://music.163.com/#/discover/toplist?id=21845217',
            'https://music.163.com/#/discover/toplist?id=11641012',
            'https://music.163.com/#/discover/toplist?id=60131',
            'https://music.163.com/#/discover/toplist?id=120001',
            'https://music.163.com/#/discover/toplist?id=112463',
            'https://music.163.com/#/discover/toplist?id=10169002',
            'https://music.163.com/#/discover/toplist?id=2809513713',
            'https://music.163.com/#/discover/toplist?id=2809577409'
        ]
        # 连接数据库
        self.mysqlCommand = MySQLCommand()
        self.mysqlCommand.connectdb()
        # 每次查询数据库中最后一条数据的ID,新加的数据每成功插入一条ID+1

    def run(self):

        for s_url in self.url:
            self.driver.get(s_url)
            time.sleep(4)
            self.driver.switch_to.frame(
                self.driver.find_element_by_name("contentFrame"))
            source = self.driver.page_source
            html = etree.HTML(source)
            list_name = html.xpath("//div[@class='hd f-cb']/h2/text()")
            play_num = html.xpath(
                "//div[@class='more s-fc3']/strong[@class='s-fc6']/text()")
            creator = '网易云'
            creator_url = '无'
            try:
                self.mysqlCommand.insert_list(s_url, list_name, creator,
                                              creator_url, play_num)
            except:
                print('列表错误' + list_name, play_num, creator_url, creator)

            url = re.findall(r'<span class="txt"><a href="(.*?)"><b', source,
                             re.DOTALL)
            song_name = re.findall(r'><b title="(.*?)">', source, re.DOTALL)
            singer = re.findall(r'div class="text" title="(.*?)"><span',
                                source, re.DOTALL)
            for i in range(len(url)):
                urli = 'https://music.163.com' + url[i]
                song_namei = re.sub(r'&nbsp;', ' ', song_name[i])
                singeri = singer[i]
                album = '网易云排行榜'
                try:
                    self.mysqlCommand.insert_musicData(song_namei, urli, album,
                                                       singeri)
                except:
                    song_nameii = ''
                    for i in song_namei:
                        if i != '\'' and i != ')':
                            song_nameii = song_nameii + i
                    try:
                        self.mysqlCommand.insert_musicData(
                            song_nameii, urli, album, singeri)
                    except Exception as e:
                        print('歌曲错误 ' + song_nameii, '歌手 ' + singeri,
                              '原名 ' + song_namei)
                        print(e)
        self.mysqlCommand.closeMysql()