Exemplo n.º 1
0
    def parser(self, html_cont):
        '''
        :param html_cont: html内容
        :return:
        '''
        if html_cont is None:
            return None
        soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8')
        subject = soup.find('div', class_="hd").get_text()
        links = soup.find_all('a', class_='J-media-item')
        html_down = HtmlDownloader()
        for link in links:
            fileinfo = FileInfo()
            # strip() 去除字符串前后空字符
            fileinfo.subject = subject.strip()
            fileinfo.filename = link.get_text().strip().replace(
                ':', '_').replace("\r\n", "").replace(u'开始学习',
                                                      "").replace(' ', '')
            fileinfo.mid = link['href'].split('/')[2]
            json = html_down.download(conf.DOWNLOAD_URL.format(
                fileinfo.mid)).decode('utf-8').replace('\/', '/')
            try:
                dic_json = eval(json)
            except:
                print('error')

            fileinfo.url['L'] = dic_json['data']['result']['mpath'][0]
            fileinfo.url['M'] = dic_json['data']['result']['mpath'][1]
            fileinfo.url['H'] = dic_json['data']['result']['mpath'][2]
            # 将
            self.res_data.append(fileinfo)

        return self.res_data