示例#1
0
文件: dmzj.py 项目: fishcg/node-web
 def getImages(self, url, title):
     try:
         r = self.get(url)
         html = r.text
         soup = BeautifulSoup(html, 'html.parser')
         p_images = soup.find_all('p', style='text-align:center')
         if len(p_images) > 0:
             # 新增专题
             now = int(time.time())
             topic_id = Mysql.create("INSERT INTO lab_topic (title, create_time, update_time) VALUES ('%s', %s, %s)" % (title, now, now))
             images_values = [] 
             for p_image in p_images:
                 url = p_image.img['src']
                 p_name = p_image.img['alt']
                 p_object = re.search( r'id=(\d*)\..*', p_name, re.I)
                 p_id = int(p_object.group(1)) if p_object else 0
                 # 获取文件后缀名
                 etc = os.path.splitext(url)[1]
                 date = time.strftime('%Y%m%d',time.localtime(time.time()))
                 old_name = date + str(round(time.time() * 1000)) + p_name
                 name = hashlib.md5(old_name.encode(encoding='UTF-8')).hexdigest() + etc
                 downloadPath = os.path.join(self.downloadPath, date)
                 self.downloadImage(url, downloadPath, name)
                 save_path = date + '/' + name
                 images_values.append("('%s', %s, '%s', %s, %s, %s)" % (save_path, topic_id, p_name, p_id, now, now))
             create_sql = 'INSERT INTO lab_image (url, topic_id, name, p_id, create_time, update_time) VALUES ' + (','.join(images_values))
             Mysql.execute(create_sql)
             print('\033[1;32m--------------------已创建:', title, '\033[0m')
     except Exception as e:
         # TODO: log
         print(e)
         return
示例#2
0
 def createNews(self, url):
     try:
         r = self.get(url)
         html = r.text
         soup = BeautifulSoup(html, 'html.parser')
         # 获取相关文章的链接
         linkHtml = soup.find('a', class_='dec_img')
         viewUrl = str(linkHtml['href'])
         if self.oldUrl == viewUrl:
             return -2
         image = str(linkHtml.img['src'])
         subject = str(linkHtml['title']).replace("'", "''")
         introHtml = linkHtml.parent.parent
         intro = introHtml.find('p', class_='com_about').get_text().replace("'", "''")
         catalogName = introHtml.find('span', class_='bq_ico').get_text()
         if catalogName == '美图':
             # 美图类型不获取
             return -2
         category_id = self.getCatalog(catalogName)
         content = self.getNewsView(viewUrl)
         now = int(time.time())
         user_id = 1
         author = '网络'
         sql = "INSERT INTO news (user_id, author, catalog_id, title, intro, content, cover, ctime, utime) VALUES (%s, '%s', %s, '%s', '%s', '%s', '%s', %s, %s)" % (user_id, author, category_id, subject, intro, content, image, now, now)
         newsID = Mysql.create(sql)
         return '{ "id": ' +  str(newsID) + ', "url": "' + viewUrl + '"}'
     except Exception as e:
         # TODO: log
         # print(e)
         return -1