def insertDb(self,index,results): conn = MySQLdb.connect(host='localhost',user='******',passwd='',port=3306,charset='utf8') cur = conn.cursor() conn.select_db('newslab') if index == 1: print '[+] 网易科技新闻...数据插入:',len(results) for result in results: coverurl = Toolkit.getImageUrl(result[0]); title = result[2] summary = result[4] source,timeStr = result[3].split(' ',1) website = result[1].strip('\"') md5Str = hashlib.md5(website).hexdigest() try: cur.execute("insert ignore into news(title,summary,coverurl,time,source,website,md5) values('%s','%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,timeStr,source,website,md5Str)) except Exception,e: print "[-] "+website+"插入失败",e
print '[+] 网易国内新闻...数据插入:',len(results) for result in results: title = result[1].strip() coverurl = Toolkit.getImageUrl(result[2]) summary = Toolkit.filterHtmlTag(result[3]).strip() source = "网易国内新闻" timeStr = result[4] website = result[0] md5Str = hashlib.md5(website).hexdigest() try: cur.execute("insert ignore into news_domestic(title,summary,coverurl,time,source,website,md5) values('%s','%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,timeStr,source,website,md5Str)) except Exception,e: print "[-] "+website+"插入失败",e elif index == 3: print '[+] 网易社会新闻...数据插入:',len(results) for result in results: title = result[1].strip() coverurl = Toolkit.getImageUrl(result[2]) summary = Toolkit.filterHtmlTag(result[3]).strip() source = "网易社会新闻" timeStr = result[4] website = result[0] md5Str = hashlib.md5(website).hexdigest() try: cur.execute("insert ignore into news_social(title,summary,coverurl,time,source,website,md5) values('%s','%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,timeStr,source,website,md5Str)) except Exception,e: print "[-] "+website+"插入失败",e conn.commit(); cur.close(); conn.close() print '[+] 网易科技新闻...结束'
summary = Toolkit.filterHtmlTag(result[3]) coverurl = str(Toolkit.getImageUrl(result[0])) source = u'腾讯国内' website = "http://news.qq.com"+result[1] md5Str = hashlib.md5(website).hexdigest() try: cur.execute("insert ignore into news_domestic(title,summary,coverurl,source,website,md5) values('%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,source,website,md5Str)) except Exception,e: print "[-] "+website+" "+title+summary+" 插入失败",e print '[+] 腾讯国内新闻...插入结束' elif index == 3: print '[+] 腾讯社会新闻...开始插入:',len(results) for result in results: title = result[2] summary = Toolkit.filterHtmlTag(result[3]) coverurl = str(Toolkit.getImageUrl(result[0])) source = u'腾讯国内' website = "http://news.qq.com"+result[1] md5Str = hashlib.md5(website).hexdigest() try: cur.execute("insert ignore into news_social(title,summary,coverurl,source,website,md5) values('%s','%s','%s','%s','%s','%s')" % (title,summary,coverurl,source,website,md5Str)) except Exception,e: print "[-] "+website+" "+title+summary+" 插入失败",e print '[+] 腾讯社会新闻...插入结束' conn.commit(); cur.close(); conn.close() print '[+] 腾讯新闻...结束'