示例#1
0
 def insertDb(self,index,results):
   
     conn = MySQLdb.connect(host='localhost',user='******',passwd='',port=3306,charset='utf8')
     cur = conn.cursor()
     conn.select_db('newslab')
     if index == 1:
         print '[+] 网易科技新闻...数据插入:',len(results)
         for result in results: 
             coverurl = Toolkit.getImageUrl(result[0]);       
             title = result[2]
             summary = result[4]
             source,timeStr =  result[3].split(' ',1)
             website = result[1].strip('\"')
             md5Str = hashlib.md5(website).hexdigest()
             try:
                 cur.execute("insert ignore into news(title,summary,coverurl,time,source,website,md5) values('%s','%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,timeStr,source,website,md5Str))
             except Exception,e:
                 print "[-] "+website+"插入失败",e
示例#2
0
     print '[+] 网易国内新闻...数据插入:',len(results)
     for result in results:               
         title = result[1].strip()
         coverurl = Toolkit.getImageUrl(result[2])
         summary = Toolkit.filterHtmlTag(result[3]).strip()
         source = "网易国内新闻"
         timeStr = result[4]
         website = result[0]
         md5Str = hashlib.md5(website).hexdigest()
         try:
             cur.execute("insert ignore into news_domestic(title,summary,coverurl,time,source,website,md5) values('%s','%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,timeStr,source,website,md5Str))
         except Exception,e:
             print "[-] "+website+"插入失败",e
 elif index == 3:
      print '[+] 网易社会新闻...数据插入:',len(results)
      for result in results:               
         title = result[1].strip()
         coverurl = Toolkit.getImageUrl(result[2])
         summary = Toolkit.filterHtmlTag(result[3]).strip()
         source = "网易社会新闻"
         timeStr = result[4]
         website = result[0]
         md5Str = hashlib.md5(website).hexdigest()
         try:
             cur.execute("insert ignore into news_social(title,summary,coverurl,time,source,website,md5) values('%s','%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,timeStr,source,website,md5Str))
         except Exception,e:
             print "[-] "+website+"插入失败",e
 conn.commit();
 cur.close();
 conn.close()  
 print '[+] 网易科技新闻...结束'      
示例#3
0
        summary = Toolkit.filterHtmlTag(result[3])
        coverurl =  str(Toolkit.getImageUrl(result[0]))
        source = u'腾讯国内'
        website = "http://news.qq.com"+result[1]
        md5Str = hashlib.md5(website).hexdigest()
        try:
            cur.execute("insert ignore into news_domestic(title,summary,coverurl,source,website,md5) values('%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,source,website,md5Str))
        except Exception,e:
            print "[-] "+website+" "+title+summary+" 插入失败",e
      print '[+] 腾讯国内新闻...插入结束'
 elif index == 3:
      print '[+] 腾讯社会新闻...开始插入:',len(results)
      for result in results:
        title = result[2]
        summary = Toolkit.filterHtmlTag(result[3])
        coverurl =  str(Toolkit.getImageUrl(result[0]))
        source = u'腾讯国内'
        website = "http://news.qq.com"+result[1]
        md5Str = hashlib.md5(website).hexdigest()
        try:
            cur.execute("insert ignore into news_social(title,summary,coverurl,source,website,md5) values('%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,source,website,md5Str))
        except Exception,e:
            print "[-] "+website+" "+title+summary+" 插入失败",e
      print '[+] 腾讯社会新闻...插入结束'   
         
 conn.commit();
 cur.close();
 conn.close() 
 print '[+] 腾讯新闻...结束'