def weixinsearch(): urls = [] driver = webdriver.PhantomJS(desired_capabilities=dcap) # driver = webdriver.Firefox() url = urlall() print url driver.get(url) driver.implicitly_wait(5) try: driver.find_element_by_xpath("//div[@id='wxmore']/a").click() except: pass jxs = driver.find_element_by_xpath("//h3[@id='weixinname']").text urlse = driver.find_elements_by_xpath("//h4/a") for a in urlse: a1 = a.get_attribute("href") a1 = str(a1) urls.append(a1) cur = Mysql(host="10.6.2.121") cur.conDB() for uri in urls: driver.get(uri) time.sleep(random.randint(3, 5)) title = driver.find_element_by_xpath("//h2[@id='activity-name']").text date = driver.find_element_by_xpath("//em[@id='post-date']").text conten = driver.find_element_by_xpath("//div[@id='js_content']").text urlconten = driver.current_url print title, "经销商:", jxs, date, urlconten cur.selDB( "INSERT INTO weibo.WEIXIN(SCREEN_NAME,DATE_ISSUE,EVENT_TOPIC,EVENT_DESCR,EVENT_URL,SEARCHED_URL) VALUES ('%s','%s','%s','%s','%s','%s')" % (jxs, date, title, conten, urlconten, url)) cur.closeDB() driver.quit()
def selmysql(): data =[] ssnews=[] ssbbs =[] sssina=[] ssweixin=[] My = Mysql(host="10.6.2.121") My.conDB() My1 = Mysql(host="10.6.2.79",user="******",passwd="123@asd") My1.conDB() for i in range(10,110,10): rkl= My.selDB(u"SELECT COUNT(*) FROM scrapy.news WHERE INSERT_TIME>=NOW()-INTERVAL %d MINUTE AND INSERT_TIME< NOW() - INTERVAL %d MINUTE"%(i,i-10))[0][0] ssnews.append(int(rkl)) for i in range(10,110,10): rkl= My.selDB(u"SELECT COUNT(*) FROM scrapy.bbs WHERE INSERT_TIME>=NOW()-INTERVAL %d MINUTE AND INSERT_TIME< NOW() - INTERVAL %d MINUTE"%(i,i-10))[0][0] ssbbs.append(int(rkl)) for i in range(10,110,10): rkl= My.selDB(u"SELECT COUNT(*) FROM weibo.sina WHERE INSERT_TIME>=NOW()-INTERVAL %d MINUTE AND INSERT_TIME< NOW() - INTERVAL %d MINUTE"%(i,i-10))[0][0] sssina.append(int(rkl)) for i in range(10,110,10): rkl= My1.selDB(u"SELECT COUNT(*) FROM WEBSITE_WEIXIN.WEIXIN WHERE STORE_TIME>=NOW()-INTERVAL %d MINUTE AND STORE_TIME< NOW() - INTERVAL %d MINUTE"%(i,i-10))[0][0] ssweixin.append(int(rkl)) My.closeDB() My1.closeDB() sszl = [ssnews[i] + ssbbs[i] +sssina[i] + ssweixin[i] for i in xrange(min(len(ssnews),len(ssbbs)))] data.append(ssnews) data.append(ssbbs) data.append(sszl) data.append(sssina) data.append(ssweixin) return data
def weixinsearch(): urls = [] driver = webdriver.PhantomJS(desired_capabilities=dcap) # driver = webdriver.Firefox() url = urlall() print url driver.get(url) driver.implicitly_wait(5) try: driver.find_element_by_xpath("//div[@id='wxmore']/a").click() except: pass jxs = driver.find_element_by_xpath("//h3[@id='weixinname']").text urlse = driver.find_elements_by_xpath("//h4/a") for a in urlse: a1 = a.get_attribute("href") a1 = str(a1) urls.append(a1) cur = Mysql(host="10.6.2.121") cur.conDB() for uri in urls: driver.get(uri) time.sleep(random.randint(3, 5)) title = driver.find_element_by_xpath("//h2[@id='activity-name']").text date = driver.find_element_by_xpath("//em[@id='post-date']").text conten = driver.find_element_by_xpath("//div[@id='js_content']").text urlconten = driver.current_url print title, "经销商:", jxs, date, urlconten cur.selDB( "INSERT INTO weibo.WEIXIN(SCREEN_NAME,DATE_ISSUE,EVENT_TOPIC,EVENT_DESCR,EVENT_URL,SEARCHED_URL) VALUES ('%s','%s','%s','%s','%s','%s')" % (jxs, date, title, conten, urlconten, url) ) cur.closeDB() driver.quit()
def weibo(): urls =readtxt() My =Mysql(host="10.6.2.121") My.conDB() for uri in urls: driver =webdriver.Firefox() driver.get("http://weibo.com/u/%s"% uri) time.sleep(5) try: url = re.sub("http:\/\/weibo\.com\/","",str(driver.current_url)) url = re.sub("u\/","",str(url)) uri = re.sub("\\n","",uri) username =driver.find_element_by_xpath("//span[@class='username']").text gzs =driver.find_element_by_xpath("(//strong[@class='W_f18'])[1]|(//strong[@class='W_f14'])[1]|(//strong[@class='W_f12'])[1]").text fs =driver.find_element_by_xpath("(//strong[@class='W_f18'])[2]|(//strong[@class='W_f14'])[2]|(//strong[@class='W_f12'])[2]").text wb =driver.find_element_by_xpath("(//strong[@class='W_f18'])[3]|(//strong[@class='W_f14'])[3]|(//strong[@class='W_f12'])[3]").text print uri,gzs,fs,wb,url,username My.selDB("INSERT INTO echarts.uid(uid,gzs,fs,wb,url,username) VALUES ('%s','%s','%s','%s','%s','%s')"%(uri,gzs,fs,wb,url,username)) # data =[] # data.append(gzs) # data.append(fs) # data.append(wb) driver.close() # return data except: driver.close() # return "" My.closeDB()
def baidushipin(): keywords =["西安","咸阳","渭南","延安","宝鸡","榆林","铜川","汉中","安康","商洛"] for keyword in keywords: print keyword req =urllib2.Request("http://v.baidu.com/v?word=%s&rn=40&ct=905969664&ie=utf-8&du=0&pd=1&sc=0&pn=0&order=0&db=0&_=1433385366675"% keyword) data = urllib2.urlopen(req).read().decode("gbk", errors='ignore') data = "".join(data.encode("utf8")) ISOTIMEFORMAT ="%Y-%m-%d %X" titles = re.findall("ti:.*",data) urls = re.findall("url: \"\/link\?.*",data) ly = re.findall("srcShortUrlExt:.*",data) date= time.strftime( ISOTIMEFORMAT, time.localtime() ) cur =Mysql(host="10.6.2.121") cur.conDB() groupname="国内视频" for i in range(len(titles)): urls[i] =re.sub("url: \"","http://v.baidu.com",urls[i]) urls[i] = re.sub("\.*\"\,","",urls[i]) ly[i] = re.sub("srcShortUrlExt: \"","",ly[i]) ly[i] = re.sub("\",","",ly[i]) titles[i]=re.sub("ti:\"","",titles[i]) titles[i]=re.sub("\",","",titles[i]) print date,ly[i],titles[i],urls[i],"国内视频" if ly[i]=="": print "跳过" continue cur.selDB("INSERT INTO scrapy.news(IR_URLNAME,IR_URLTITLE,IR_CONTENT,IR_SRCNAME,IR_GROUPNAME,IR_SITENAME,IR_URLTIME,IR_CATALOG2) VALUES ('%s','%s','%s','%s','%s','%s','%s','python')"%(urls[i],titles[i],titles[i],ly[i],groupname,ly[i],date)) cur.closeDB()
def scrapysinaweibo(): mids = [] zfses = [] driver = webdriver.Firefox() #两种方式一种随机方法readrandom(),一种数据库去为爬过词方法readrandom() word = wordsall() word = str(word).replace('"', "").replace("+", "") words = word print word word = urllib2.quote(word).replace("%0A", "") driver.get("http://s.weibo.com/weibo/%s" % word) driver.implicitly_wait(5) # time.sleep(random.randint(5,10)) #获取所需字段 midsall = driver.find_elements_by_xpath( "//div[@class='WB_cardwrap S_bg2 clearfix']/div") uids = driver.find_elements_by_xpath( "//ul[@class='feed_action_info feed_action_row4']/li[2]/a") contens = driver.find_elements_by_xpath( "//div[@class='feed_content wbcon']/p[@class='comment_txt']") times = driver.find_elements_by_xpath( "//div[@class='content clearfix']/div[@class='feed_from W_textb']/a[@class='W_textb']" ) zfsesall = driver.find_elements_by_xpath( "//ul[@class='feed_action_info feed_action_row4']/li[2]//em") usernames = driver.find_elements_by_xpath( "//div[@class='feed_content wbcon']/a[@class='W_texta W_fb']") userimgs = driver.find_elements_by_xpath("//div[@class='face']/a/img") #评论数有问题 # plses = driver.find_elements_by_xpath("//ul[@class='feed_action_info feed_action_row4']/li[3]//em") # print "pls:",len(plses) #特殊数据处理 for mid in midsall: if mid.get_attribute("mid") == None: continue mids.append(mid) for zfss in zfsesall: zfs = zfss.text if zfs == None: zfs = "0" elif zfs == "": zfs = "0" zfses.append(zfs) cur = Mysql(host="10.6.2.121") cur.conDB() for i in range(len(uids)): uid = uids[i].get_attribute("action-data") uid = re.search(r"(?<=uid=)\d+", uid).group() mid = mids[i].get_attribute("mid") conten = contens[i].text date = times[i].get_attribute("title") zfs = zfses[i] username = usernames[i].get_attribute("title") userimg = userimgs[i].get_attribute("src") group = "scrapy微博" sitename = "新浪微博" print username, conten, uid, mid, date, zfs, words, userimg # cur.selDB("INSERT INTO weibo.sina(IR_UID,IR_MID,IR_CREATED_AT,IR_STATUS_CONTENT,IR_RTTCOUNT,IR_GROUPNAME,IR_SITENAME,KEYWORD,IR_SCREEN_NAME,IR_PROFILE_IMAGE_URL) VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')"%(uid,mid,date,conten,zfs,group,sitename,words,username,userimg)) driver.quit() cur.closeDB()
def mysql(): sql = Mysql(host="10.6.2.121") sql.conDB() #获取当天入库量 allnews = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLDAYNEWS'")[0][0] allbbs = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLDAYBBS'")[0][0] allsina = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLDAYSINA'")[0][0] all = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLDAY'")[0][0] #获取上周入库量 allweeknews = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLWEEKNEWS'")[0][0] allweekbbs = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLWEEKBBS'")[0][0] allweeksina = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLWEEKSINA'")[0][0] allweek = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLWEEK'")[0][0] #获取上月入库量 allmonthnews = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLMONTHNEWS'")[0][0] allmonthbbs = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLMONTHBBS'")[0][0] allmonthsina = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLMONTHSINA'")[0][0] allmonth = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLMONTH'")[0][0] #获取当年入库总量 allyearnews = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLYEARNEWS'")[0][0] allyearbbs = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLYEARBBS'")[0][0] allyearsina = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLYEARSINA'")[0][0] allyear = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLYEAR'")[0][0] data={ "allrkl":"[%s,%s,%s,%s]"%(allnews,allbbs,allsina,all), "allweek":"[%s,%s,%s,%s]"%(allweeknews,allweekbbs,allweeksina,allweek), "allmonth":"[%s,%s,%s,%s]"%(allmonthnews,allmonthbbs,allmonthsina,allmonth), "allyear":"[%s,%s,%s,%s]"%(allyearnews,allyearbbs,allyearsina,allyear) } sql.closeDB() return jsonify(data)
def mysqldaynews(): m = Mysql(host="10.6.2.121") m.conDB() news = m.selDB("SELECT COUNT(*) FROM scrapy.news WHERE DATE_SUB(CURDATE(), INTERVAL 0 DAY) = date(INSERT_TIME); ")[0][0] number = news print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLDAYNEWS','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLDAYNEWS'"% number) m.closeDB()
def mysqlmonthsina(): m = Mysql(host="10.6.2.121") m.conDB() sina = m.selDB("SELECT COUNT(*) FROM weibo.sina WHERE DATE_FORMAT(INSERT_TIME,'%Y-%m')=DATE_FORMAT(DATE_SUB(curdate(), INTERVAL 1 MONTH),'%Y-%m'); ")[0][0] number = sina print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLMONTHSINA','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLMONTHSINA'"% number) m.closeDB()
def mysqlweeksina(): m = Mysql(host="10.6.2.121") m.conDB() sina = m.selDB("SELECT COUNT(*) FROM weibo.sina WHERE YEARWEEK(date_format(INSERT_TIME,'%Y-%m-%d')) = YEARWEEK(now())-1; ")[0][0] number = sina print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLWEEKSINA','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLWEEKSINA'"% number) m.closeDB()
def mysqldaysina(): m = Mysql(host="10.6.2.121") m.conDB() sina = m.selDB("SELECT COUNT(*) FROM weibo.sina WHERE DATE_SUB(CURDATE(), INTERVAL 0 DAY) = date(INSERT_TIME); ")[0][0] number = sina print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLDAYSINA','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLDAYSINA'"% number) m.closeDB()
def mysqlyearbbs(): m = Mysql(host="10.6.2.121") m.conDB() bbs = m.selDB("SELECT COUNT(*) FROM scrapy.bbs WHERE YEAR(date_format(INSERT_TIME,'%Y-%m-%d')) = YEAR(now()); ")[0][0] number = bbs print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLYEARBBS','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLYEARBBS'"% number) m.closeDB()
def mysqlmonthnews(): m = Mysql(host="10.6.2.121") m.conDB() news = m.selDB("SELECT COUNT(*) FROM scrapy.news WHERE DATE_FORMAT(INSERT_TIME,'%Y-%m')=DATE_FORMAT(DATE_SUB(curdate(), INTERVAL 1 MONTH),'%Y-%m'); ")[0][0] number = news print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLMONTHNEWS','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLMONTHNEWS'"% number) m.closeDB()
def mysqlweeknews(): m = Mysql(host="10.6.2.121") m.conDB() news = m.selDB("SELECT COUNT(*) FROM scrapy.news WHERE YEARWEEK(date_format(INSERT_TIME,'%Y-%m-%d')) = YEARWEEK(now())-1; ")[0][0] number = news print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLWEEKNEWS','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLWEEKNEWS'"% number) m.closeDB()
def scrapysinaweibo(): mids =[] zfses=[] driver =webdriver.Firefox() #两种方式一种随机方法readrandom(),一种数据库去为爬过词方法readrandom() word = wordsall() word = str(word).replace('"',"").replace("+","") words =word print word word = urllib2.quote(word).replace("%0A","") driver.get("http://s.weibo.com/weibo/%s"%word) driver.implicitly_wait(5) # time.sleep(random.randint(5,10)) #获取所需字段 midsall = driver.find_elements_by_xpath("//div[@class='WB_cardwrap S_bg2 clearfix']/div") uids = driver.find_elements_by_xpath("//ul[@class='feed_action_info feed_action_row4']/li[2]/a") contens = driver.find_elements_by_xpath("//div[@class='feed_content wbcon']/p[@class='comment_txt']") times = driver.find_elements_by_xpath("//div[@class='content clearfix']/div[@class='feed_from W_textb']/a[@class='W_textb']") zfsesall = driver.find_elements_by_xpath("//ul[@class='feed_action_info feed_action_row4']/li[2]//em") usernames = driver.find_elements_by_xpath("//div[@class='feed_content wbcon']/a[@class='W_texta W_fb']") userimgs = driver.find_elements_by_xpath("//div[@class='face']/a/img") #评论数有问题 # plses = driver.find_elements_by_xpath("//ul[@class='feed_action_info feed_action_row4']/li[3]//em") # print "pls:",len(plses) #特殊数据处理 for mid in midsall: if mid.get_attribute("mid") ==None: continue mids.append(mid) for zfss in zfsesall: zfs= zfss.text if zfs ==None: zfs ="0" elif zfs =="": zfs ="0" zfses.append(zfs) cur =Mysql(host="10.6.2.121") cur.conDB() for i in range(len(uids)): uid= uids[i].get_attribute("action-data") uid =re.search(r"(?<=uid=)\d+",uid).group() mid = mids[i].get_attribute("mid") conten = contens[i].text date = times[i].get_attribute("title") zfs = zfses[i] username =usernames[i].get_attribute("title") userimg = userimgs[i].get_attribute("src") group = "scrapy微博" sitename = "新浪微博" print username,conten,uid,mid,date,zfs,words,userimg # cur.selDB("INSERT INTO weibo.sina(IR_UID,IR_MID,IR_CREATED_AT,IR_STATUS_CONTENT,IR_RTTCOUNT,IR_GROUPNAME,IR_SITENAME,KEYWORD,IR_SCREEN_NAME,IR_PROFILE_IMAGE_URL) VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')"%(uid,mid,date,conten,zfs,group,sitename,words,username,userimg)) driver.quit() cur.closeDB()
def mysqlyearnews(): m = Mysql(host="10.6.2.121") m.conDB() news = m.selDB( "SELECT COUNT(*) FROM scrapy.news WHERE YEAR(date_format(INSERT_TIME,'%Y-%m-%d')) = YEAR(now()); " )[0][0] number = news print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLYEARNEWS','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLYEARNEWS'" % number) m.closeDB()
def mysqlweeksina(): m = Mysql(host="10.6.2.121") m.conDB() sina = m.selDB( "SELECT COUNT(*) FROM weibo.sina WHERE YEARWEEK(date_format(INSERT_TIME,'%Y-%m-%d')) = YEARWEEK(now())-1; " )[0][0] number = sina print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLWEEKSINA','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLWEEKSINA'" % number) m.closeDB()
def mysqlmonthsina(): m = Mysql(host="10.6.2.121") m.conDB() sina = m.selDB( "SELECT COUNT(*) FROM weibo.sina WHERE DATE_FORMAT(INSERT_TIME,'%Y-%m')=DATE_FORMAT(DATE_SUB(curdate(), INTERVAL 1 MONTH),'%Y-%m'); " )[0][0] number = sina print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLMONTHSINA','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLMONTHSINA'" % number) m.closeDB()
def mysqlweekbbs(): m = Mysql(host="10.6.2.121") m.conDB() bbs = m.selDB( "SELECT COUNT(*) FROM scrapy.bbs WHERE YEARWEEK(date_format(INSERT_TIME,'%Y-%m-%d')) = YEARWEEK(now())-1; " )[0][0] number = bbs print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLWEEKBBS','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLWEEKBBS'" % number) m.closeDB()
def mysqldaysina(): m = Mysql(host="10.6.2.121") m.conDB() sina = m.selDB( "SELECT COUNT(*) FROM weibo.sina WHERE DATE_SUB(CURDATE(), INTERVAL 0 DAY) = date(INSERT_TIME); " )[0][0] number = sina print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLDAYSINA','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLDAYSINA'" % number) m.closeDB()
def mysqlmonthnews(): m = Mysql(host="10.6.2.121") m.conDB() news = m.selDB( "SELECT COUNT(*) FROM scrapy.news WHERE DATE_FORMAT(INSERT_TIME,'%Y-%m')=DATE_FORMAT(DATE_SUB(curdate(), INTERVAL 1 MONTH),'%Y-%m'); " )[0][0] number = news print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLMONTHNEWS','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLMONTHNEWS'" % number) m.closeDB()
def mysqldaybbs(): m = Mysql(host="10.6.2.121") m.conDB() bbs = m.selDB( "SELECT COUNT(*) FROM scrapy.bbs WHERE DATE_SUB(CURDATE(), INTERVAL 0 DAY) = date(INSERT_TIME); " )[0][0] number = bbs print number # m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM) VALUES('ALLDAYBBS','%s')"%number) m.selDB("UPDATE echarts.user SET SQLNUM='%s' WHERE TYPE='ALLDAYBBS'" % number) m.closeDB()
def mysql(): sql = Mysql(host="10.6.2.121") sql.conDB() #获取当天入库量 allnews = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLDAYNEWS'")[0][0] allbbs = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLDAYBBS'")[0][0] allsina = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLDAYSINA'")[0][0] all = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLDAY'")[0][0] #获取上周入库量 allweeknews = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLWEEKNEWS'")[0][0] allweekbbs = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLWEEKBBS'")[0][0] allweeksina = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLWEEKSINA'")[0][0] allweek = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLWEEK'")[0][0] #获取上月入库量 allmonthnews = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLMONTHNEWS'")[0][0] allmonthbbs = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLMONTHBBS'")[0][0] allmonthsina = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLMONTHSINA'")[0][0] allmonth = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLMONTH'")[0][0] #获取当年入库总量 allyearnews = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLYEARNEWS'")[0][0] allyearbbs = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLYEARBBS'")[0][0] allyearsina = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLYEARSINA'")[0][0] allyear = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='ALLYEAR'")[0][0] data = { "allrkl": "[%s,%s,%s,%s]" % (allnews, allbbs, allsina, all), "allweek": "[%s,%s,%s,%s]" % (allweeknews, allweekbbs, allweeksina, allweek), "allmonth": "[%s,%s,%s,%s]" % (allmonthnews, allmonthbbs, allmonthsina, allmonth), "allyear": "[%s,%s,%s,%s]" % (allyearnews, allyearbbs, allyearsina, allyear) } sql.closeDB() return jsonify(data)
def day30(): sql = Mysql(host="10.6.2.121") sql.conDB() #获取news三十天数据 news30 = [] bbs30 = [] sina30 = [] all30 = [] day30 = [] for i in xrange(0, 30): news = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='TIMEDAYNEWS' AND date(DAY_TIME)=DATE_SUB(CURDATE(), INTERVAL %d DAY)" % i)[0][0] news30.append(int(news)) bbs = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='TIMEDAYBBS' AND date(DAY_TIME)=DATE_SUB(CURDATE(), INTERVAL %d DAY)" % i)[0][0] bbs30.append(int(bbs)) sina = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='TIMEDAYSINA' AND date(DAY_TIME)=DATE_SUB(CURDATE(), INTERVAL %d DAY)" % i)[0][0] sina30.append(int(sina)) all = sql.selDB( u"SELECT SQLNUM FROM echarts.user WHERE TYPE='TIMEDAY' AND date(DAY_TIME)=DATE_SUB(CURDATE(), INTERVAL %d DAY)" % i)[0][0] all30.append(int(all)) daytime = str( sql.selDB("SELECT DATE_SUB(CURDATE(), INTERVAL %s DAY)" % i)[0][0]) day30.append(daytime) print daytime print news30 news30.reverse() bbs30.reverse() sina30.reverse() all30.reverse() day30.reverse() print day30 data = { "news30": "%s" % news30, "bbs30": "%s" % bbs30, "sina30": "%s" % sina30, "all30": "%s" % all30, "day30": "%s" % day30 } sql.closeDB() return jsonify(data)
def selmysql(): data = [] ssnews = [] ssbbs = [] sssina = [] ssweixin = [] My = Mysql(host="10.6.2.121") My.conDB() My1 = Mysql(host="10.6.2.79", user="******", passwd="123@asd") My1.conDB() for i in range(10, 110, 10): rkl = My.selDB( u"SELECT COUNT(*) FROM scrapy.news WHERE INSERT_TIME>=NOW()-INTERVAL %d MINUTE AND INSERT_TIME< NOW() - INTERVAL %d MINUTE" % (i, i - 10))[0][0] ssnews.append(int(rkl)) for i in range(10, 110, 10): rkl = My.selDB( u"SELECT COUNT(*) FROM scrapy.bbs WHERE INSERT_TIME>=NOW()-INTERVAL %d MINUTE AND INSERT_TIME< NOW() - INTERVAL %d MINUTE" % (i, i - 10))[0][0] ssbbs.append(int(rkl)) for i in range(10, 110, 10): rkl = My.selDB( u"SELECT COUNT(*) FROM weibo.sina WHERE INSERT_TIME>=NOW()-INTERVAL %d MINUTE AND INSERT_TIME< NOW() - INTERVAL %d MINUTE" % (i, i - 10))[0][0] sssina.append(int(rkl)) for i in range(10, 110, 10): rkl = My1.selDB( u"SELECT COUNT(*) FROM WEBSITE_WEIXIN.WEIXIN WHERE STORE_TIME>=NOW()-INTERVAL %d MINUTE AND STORE_TIME< NOW() - INTERVAL %d MINUTE" % (i, i - 10))[0][0] ssweixin.append(int(rkl)) My.closeDB() My1.closeDB() sszl = [ ssnews[i] + ssbbs[i] + sssina[i] + ssweixin[i] for i in xrange(min(len(ssnews), len(ssbbs))) ] data.append(ssnews) data.append(ssbbs) data.append(sszl) data.append(sssina) data.append(ssweixin) return data
def data30(): m = Mysql(host="10.6.2.121") m.conDB() m.selDB("DELETE FROM echarts.user WHERE TYPE LIKE '%TIME%'") for i in xrange(0,30): timedata = m.selDB("SELECT DATE_SUB(CURDATE(), INTERVAL %s DAY)"% i)[0][0] daynews = m.selDB("SELECT COUNT(*) FROM scrapy.news where date(INSERT_TIME)=DATE_SUB(CURDATE(), INTERVAL %s DAY)"% i)[0][0] daybbs = m.selDB("SELECT COUNT(*) FROM scrapy.bbs where date(INSERT_TIME)=DATE_SUB(CURDATE(), INTERVAL %s DAY)"% i)[0][0] daysina = m.selDB("SELECT COUNT(*) FROM weibo.sina where date(INSERT_TIME)=DATE_SUB(CURDATE(), INTERVAL %s DAY)"% i)[0][0] day = daynews+daybbs+daysina #插入30天数据量 m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM,DAY_TIME) VALUES ('TIMEDAYNEWS','%s','%s')"%(daynews,timedata)) m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM,DAY_TIME) VALUES ('TIMEDAYBBS','%s','%s')"%(daybbs,timedata)) m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM,DAY_TIME) VALUES ('TIMEDAYSINA','%s','%s')"%(daysina,timedata)) m.selDB("INSERT INTO echarts.user(TYPE,SQLNUM,DAY_TIME) VALUES ('TIMEDAY','%s','%s')"%(day,timedata)) #更新30天数据量 # m.selDB("UPDATE echarts.user SET SQLNUM='%s',DAY_TIME='%s' WHERE TYPE='TIMEDAYNEWS' AND date(DAY_TIME)<=DATE_SUB(CURDATE(), INTERVAL %d DAY) LIMIT 1"%(daynews,time,i)) # m.selDB("UPDATE echarts.user SET SQLNUM='%s',DAY_TIME='%s' WHERE TYPE='TIMEDAYBBS' AND date(DAY_TIME)<=DATE_SUB(CURDATE(), INTERVAL %d DAY) LIMIT 1"%(daybbs,time,i)) # m.selDB("UPDATE echarts.user SET SQLNUM='%s',DAY_TIME='%s' WHERE TYPE='TIMEDAYSINA' AND date(DAY_TIME)<=DATE_SUB(CURDATE(), INTERVAL %d DAY) LIMIT 1"%(daysina,time,i)) # m.selDB("UPDATE echarts.user SET SQLNUM='%s',DAY_TIME='%s' WHERE TYPE='TIMEDAY' AND date(DAY_TIME)<=DATE_SUB(CURDATE(), INTERVAL %d DAY) LIMIT 1"%(day,time,i)) print timedata,daynews,daybbs,daysina,day m.closeDB()
def day30(): sql = Mysql(host="10.6.2.121") sql.conDB() #获取news三十天数据 news30=[] bbs30=[] sina30=[] all30=[] day30=[] for i in xrange(0,30): news = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='TIMEDAYNEWS' AND date(DAY_TIME)=DATE_SUB(CURDATE(), INTERVAL %d DAY)"% i)[0][0] news30.append(int(news)) bbs = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='TIMEDAYBBS' AND date(DAY_TIME)=DATE_SUB(CURDATE(), INTERVAL %d DAY)"% i)[0][0] bbs30.append(int(bbs)) sina = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='TIMEDAYSINA' AND date(DAY_TIME)=DATE_SUB(CURDATE(), INTERVAL %d DAY)"% i)[0][0] sina30.append(int(sina)) all = sql.selDB(u"SELECT SQLNUM FROM echarts.user WHERE TYPE='TIMEDAY' AND date(DAY_TIME)=DATE_SUB(CURDATE(), INTERVAL %d DAY)"% i)[0][0] all30.append(int(all)) daytime =str(sql.selDB("SELECT DATE_SUB(CURDATE(), INTERVAL %s DAY)"% i)[0][0]) day30.append(daytime) print daytime print news30 news30.reverse() bbs30.reverse() sina30.reverse() all30.reverse() day30.reverse() print day30 data={ "news30":"%s"%news30, "bbs30":"%s"% bbs30, "sina30":"%s"%sina30, "all30":"%s"%all30, "day30":"%s"%day30 } sql.closeDB() return jsonify(data)
def data30(): m = Mysql(host="10.6.2.121") m.conDB() m.selDB("DELETE FROM echarts.user WHERE TYPE LIKE '%TIME%'") for i in xrange(0, 30): timedata = m.selDB("SELECT DATE_SUB(CURDATE(), INTERVAL %s DAY)" % i)[0][0] daynews = m.selDB( "SELECT COUNT(*) FROM scrapy.news where date(INSERT_TIME)=DATE_SUB(CURDATE(), INTERVAL %s DAY)" % i)[0][0] daybbs = m.selDB( "SELECT COUNT(*) FROM scrapy.bbs where date(INSERT_TIME)=DATE_SUB(CURDATE(), INTERVAL %s DAY)" % i)[0][0] daysina = m.selDB( "SELECT COUNT(*) FROM weibo.sina where date(INSERT_TIME)=DATE_SUB(CURDATE(), INTERVAL %s DAY)" % i)[0][0] day = daynews + daybbs + daysina #插入30天数据量 m.selDB( "INSERT INTO echarts.user(TYPE,SQLNUM,DAY_TIME) VALUES ('TIMEDAYNEWS','%s','%s')" % (daynews, timedata)) m.selDB( "INSERT INTO echarts.user(TYPE,SQLNUM,DAY_TIME) VALUES ('TIMEDAYBBS','%s','%s')" % (daybbs, timedata)) m.selDB( "INSERT INTO echarts.user(TYPE,SQLNUM,DAY_TIME) VALUES ('TIMEDAYSINA','%s','%s')" % (daysina, timedata)) m.selDB( "INSERT INTO echarts.user(TYPE,SQLNUM,DAY_TIME) VALUES ('TIMEDAY','%s','%s')" % (day, timedata)) #更新30天数据量 # m.selDB("UPDATE echarts.user SET SQLNUM='%s',DAY_TIME='%s' WHERE TYPE='TIMEDAYNEWS' AND date(DAY_TIME)<=DATE_SUB(CURDATE(), INTERVAL %d DAY) LIMIT 1"%(daynews,time,i)) # m.selDB("UPDATE echarts.user SET SQLNUM='%s',DAY_TIME='%s' WHERE TYPE='TIMEDAYBBS' AND date(DAY_TIME)<=DATE_SUB(CURDATE(), INTERVAL %d DAY) LIMIT 1"%(daybbs,time,i)) # m.selDB("UPDATE echarts.user SET SQLNUM='%s',DAY_TIME='%s' WHERE TYPE='TIMEDAYSINA' AND date(DAY_TIME)<=DATE_SUB(CURDATE(), INTERVAL %d DAY) LIMIT 1"%(daysina,time,i)) # m.selDB("UPDATE echarts.user SET SQLNUM='%s',DAY_TIME='%s' WHERE TYPE='TIMEDAY' AND date(DAY_TIME)<=DATE_SUB(CURDATE(), INTERVAL %d DAY) LIMIT 1"%(day,time,i)) print timedata, daynews, daybbs, daysina, day m.closeDB()
def weibo(): urls = readtxt() My = Mysql(host="10.6.2.121") My.conDB() for uri in urls: driver = webdriver.Firefox() driver.get("http://weibo.com/u/%s" % uri) time.sleep(5) try: url = re.sub("http:\/\/weibo\.com\/", "", str(driver.current_url)) url = re.sub("u\/", "", str(url)) uri = re.sub("\\n", "", uri) username = driver.find_element_by_xpath( "//span[@class='username']").text gzs = driver.find_element_by_xpath( "(//strong[@class='W_f18'])[1]|(//strong[@class='W_f14'])[1]|(//strong[@class='W_f12'])[1]" ).text fs = driver.find_element_by_xpath( "(//strong[@class='W_f18'])[2]|(//strong[@class='W_f14'])[2]|(//strong[@class='W_f12'])[2]" ).text wb = driver.find_element_by_xpath( "(//strong[@class='W_f18'])[3]|(//strong[@class='W_f14'])[3]|(//strong[@class='W_f12'])[3]" ).text print uri, gzs, fs, wb, url, username My.selDB( "INSERT INTO echarts.uid(uid,gzs,fs,wb,url,username) VALUES ('%s','%s','%s','%s','%s','%s')" % (uri, gzs, fs, wb, url, username)) # data =[] # data.append(gzs) # data.append(fs) # data.append(wb) driver.close() # return data except: driver.close() # return "" My.closeDB()