def Gao(): #push_news('090a4d7b383', '【班助一】爱满空巢——化工学院1314101团支部探望空巢老人' + '$' + 'http://today.hit.edu.cn/news/2014/12-02/6272013221RL0.htm') #return minute = time.strftime('%Y/%m/%d/%H/%M/%S', time.localtime(time.time())) j = minute.split('/') now = int(j[0]) * 365 * 24 * 60 + int(j[1]) * 30 * 24 * 60 + int(j[2]) * 24 * 60 + int(j[3]) * 60 + int(j[4]) res = "" user = [] news = [] for x in db.get_all('news'): news.append(x) for x in db.get_all('keyword'): if x.push_id == "null" or x.push_id == "": continue if user.count(x.push_id) == 0: p = 0 push_time = 0 last_id = 0 for y in db.get_all('push_time'): if y.push_id == x.push_id: p = y.id push_time = y.time last_id = y.last_id break us = "" tit = "" ur = "" flag = 0 for y in news[::-1]: if y.word == x.word or y.title.find(x.word) != -1: if now - push_time >= 60 and y.id > last_id: user.append(x.push_id) us = x.push_id tit = y.title + '$' + y.url ur = y.url last_id = y.id flag = 1 break; if flag == 0: continue #if x.push_id == "000f08ac5e9": # return "123" #if x.push_id == '070dec8af07': # return "456" for y in db.get_all('URL'): if y.push_id == us and ur.find(y.url) != -1: flag = 0 break if flag == 0: continue if push_time > 0: db.delete_push_time("id = " + str(p)) db.insert_push_time(us, now, last_id) push_news(us, tit)
def hituc(): m = re.findall(r"""/news/Show\S* \s*target="_blank"\s*\S*\s*\S*\s*\S*\s*\S* </a>""", urllib2.urlopen("http://hituc.hit.edu.cn").read(), re.M|re.X) d = re.findall(r"""class="news">\d*-\d*-\d* </TD>""", urllib2.urlopen("http://hituc.hit.edu.cn").read(), re.M|re.X) T = time.strftime('%Y/%m/%d', time.localtime(time.time())) #T = "2014/9/22" URL = [] for x in db.get_all('News'): URL.append(x.url) j = 0 for i in m: date = d[j][13:-11] j += 1 date = date.replace('-', '/') if(cmp(date, T) != 0): continue l = i.split('target') url = "http://hituc.hit.edu.cn" url += l[0][0:-2] if(URL.count(url) != 0): continue URL.append(url) title = l[1][23:-4].decode('gbk').encode('utf8') db.get_insert_news('News', str(url), title, date)
def usp(): m = re.findall(r"""showXinWen_XX.asp\?MC=\S* \s*\S*\s*title\s*=\s*\S* \s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S*\s*\S* </font>""", urllib2.urlopen("http://www.usp.com.cn").read(), re.M|re.X) T = time.strftime('%Y/%m/%d', time.localtime(time.time())) #T = "2014/10/16" URL = [] for x in db.get_all('News'): URL.append(x.url) for x in m: i = x.decode('gbk').encode('utf8') date = i[-17:-7] date = date.replace('-', '/') print date if(cmp(date, T) != 0): continue url = "http://www.usp.com.cn/" j = i.split("' target='_blank' title = '") url += j[0] if(URL.count(url) != 0): continue URL.append(url) k = j[1].split("><font color=") title = k[0][0:-2] db.get_insert_news('News', str(url), title, date)
def today(): m = re.findall(r"""<title><\S*\s*\S*\s*\S* </title>""", urllib2.urlopen("http://today.hit.edu.cn/rss.xml").read(), re.M|re.X) link = re.findall(r"""<link><\S*\s*\S*\s*\S* </link>""", urllib2.urlopen("http://today.hit.edu.cn/rss.xml").read(), re.M|re.X) T = time.strftime('%Y/%m/%d', time.localtime(time.time())) #T = "2014/10/27" URL = [] for x in db.get_all('News'): URL.append(x.url) j = 0 for i in link: l = i.split("[CDATA[") date = l[1][29:39] date = date.replace('-', '/') if(cmp(date, T) != 0): continue r = l[1].split("]]>") url = r[0] if(URL.count(url) != 0): continue URL.append(url) x = m[j].decode('gbk').encode('utf8') title = x[16:-11] j += 1 db.get_insert_news('News', str(url), str(title), date)
def studyathit(): m = re.findall(r"""detail.asp\?id= \d*"\s* class="newstext"\s>¡¤ \S* </a>""", urllib.urlopen("http://studyathit.hit.edu.cn").read(), re.M|re.X) d = re.findall(r""" \d\d\d\d-\d\d-\d\d </td>""", urllib.urlopen("http://studyathit.hit.edu.cn").read(), re.M|re.X) T = time.strftime('%Y/%m/%d', time.localtime(time.time())) #T = "2014/10/21" URL = [] for x in db.get_all('News'): URL.append(x.url) j = 0 for i in m: date = d[j][-15:-5] date = date.replace('-', '/') j += 1 if(cmp(date, T) != 0): continue url = "http://studyathit.hit.edu.cn/" url += i[0:18] if(URL.count(url) != 0): continue URL.append(url) l = i.split(" >") #x = l[1].decode('GB2312').encode('utf8') title = l[1][2:-4] db.get_insert_news('News', str(url), title, date)
def acm(): m = re.findall(r"""<h3><a\shref="http://acm.hit.edu.cn/article/ \d* \S* </a>""", urllib.urlopen("http://acm.hit.edu.cn").read(), re.M|re.X) d = re.findall(r"""<span\sclass="time">\d\d\d\d-\d\d-\d\d </span>""", urllib.urlopen("http://acm.hit.edu.cn").read(), re.M|re.X) #T = time.strftime('%Y/%m/%d', time.localtime(time.time())) #T = "2014/09/26" URL = [] for x in db.get_all('News'): URL.append(x.url) j = 0 for i in m: date = d[j][-17:-7] date = date.replace('-', '/') j += 1 #print date #if(cmp(date, T) != 0): # continue url = i[13:47] if(URL.count(url) != 0): continue URL.append(url) title = i[49:-4] db.get_insert_news('News', str(url), str(title), date)
def init(): ret = "" Key = [] for key in db.get_all('keyword'): if Key.count(key.word) == 0: Key.append(key.word) ret += key.word #Key.append('计算机学院') return GO(Key) return ret
def zsb (): m = re.findall(r"""news/user/News_Detail_use.asp \S * height=800, \s \S* #state \s \S* #menubar \s \S* #resizable \s \S* #scrollbars \s \S* #false \s \S* #target \s \S* #title \s\S*\s* </a> \s*\S* """, urllib2.urlopen("http://zsb.hit.edu.cn").read(), re.M|re.X) T = time.strftime('%m.%d', time.localtime(time.time())) #T = "9.29" URL = [] for x in db.get_all('News'): URL.append(x.url) for i in m: dat = i[-5:-1] if(cmp(dat, T) != 0): continue d = T.replace('.', '/') date = "2014/" if(d[1] == '/'):date += "0" date += d url = "http://zsb.hit.edu.cn/" url += i[0:37] cnt = URL.count(url) if(cnt != 0): continue URL.append(url) l = i.split(">") title = l[1][0:-3].decode('gbk').encode('utf8') db.get_insert_news('News', str(url), title, date)
def GET(self): #db.get_insert_news('News', "12312313123", "郑博郑博郑博郑博郑博郑博郑博郑博郑博郑博郑博") #return render.hello("你好") m = re.findall(r"""'/news/ + # date \w * # 2014 / #/ \w\w #month - #- \w* #day / #/ \w * #words .htm' \s # white space title=' \S * ' \s""", urllib2.urlopen("http://today.hit.edu.cn").read(), re.M|re.X) #return render.hello(len(m)) T = time.strftime('%Y/%m-%d', time.localtime(time.time())) #T = "2014/10-26" for x in db.get_all('News'): #return render.hello(len(m)) db.get_delete('News', "id=" + str(x.id)) URL = [] Title = [] #News = [] for i in m: #return render.hello(len(i)) y = i.decode('gbk').encode('utf8') x = y.split("title=") url = "http://today.hit.edu.cn" url += x[0][1:-2] title = x[1][1:-2] date = i[7:17] if(cmp(date, T) != 0): continue cnt = URL.count(url) if(cnt != 0): continue URL.append(url) #print url #if title.find("电气"): # return render.hello("123") db.get_insert_news('News', str(url), str(title))
def news(): m = re.findall(r""""/articles/ + # date \w * # 2014 / #/ \w\w #month - #- \w* #day / #/ \w * #words .htm" \s* # white space target="_blank"> \S*\s*\S*\s*\S* </a>""", urllib.urlopen("http://news.hit.edu.cn").read(), re.M|re.X) T = time.strftime('%Y/%m/%d', time.localtime(time.time())) #T = "2014/10/24" URL = [] for x in db.get_all('News'): URL.append(x.url) for x in m: i = x.decode('gbk').encode('utf8') date = i[11:21] date = date.replace('-', '/') j = i.split(' target="_blank">') if(cmp(date, T) != 0): continue url = "http://news.hit.edu.cn" url += j[0][1:-1] cnt = URL.count(url) if(cnt != 0): continue URL.append(url) title = j[1][0:-4] db.get_insert_news('News', str(url), str(title), date)
def jwc(): m = re.findall(r"""<a\shref="/news/Showgg.asp\?id= \d\d\d\d" \s \S * #target \s \S* </a>""", urllib.urlopen("http://jwc.hit.edu.cn").read(), re.M|re.X) mm = re.findall(r"""class="news">\d\d\d\d-\d*-\d* #date """, urllib.urlopen("http://jwc.hit.edu.cn").read(), re.M|re.X) T = time.strftime('%Y/%m/%d', time.localtime(time.time())) #T = "2014/10/24" URL = [] for x in db.get_all('News'): URL.append(x.url) j = 0 for i in m: date = mm[j][13:-6] j += 1 date = date.replace('-', '/') if(cmp(date, T) != 0): continue i = i.split(' target="_blank" class="news">') url = "http://jwc.hit.edu.cn" url += i[0][9:-1] if(URL.count(url) != 0): continue URL.append(url) title = i[1][0:-4].decode('gbk').encode('utf8') db.get_insert_news('News', str(url), title, date)
def yzb(): d = re.findall(r"""class="date">\d\d-\d\d</div> """, urllib2.urlopen("http://yzb.hit.edu.cn").read(), re.M|re.X) m = re.findall(r"""/article/list/view/id/ \S* \s* title=' \S*\s*\S* '> """, urllib2.urlopen("http://yzb.hit.edu.cn").read(), re.M|re.X) #T = time.strftime('%Y/%m/%d', time.localtime(time.time())) T = "2014/09/19" URL = [] for x in db.get_all('News'): URL.append(x.url) j = 0 for i in m: date = "2014/" + d[j][13:-6] date = date.replace('-', '/') j += 1 if(cmp(date, T) != 0): continue url = "http://yzb.hit.edu.cn" url += i[0:25] if(URL.count(url) != 0): continue URL.append(url) k = i.split("title='") title = k[1][0:-2] db.get_insert_news('News', str(url), title, date)
def hqjt(): m = re.findall(r"""/news/show.asp\?id= #prefix \d\d\d\d\s #id target="_blank"> # \S * #title </a> \s <span \s style="color:\#aaa;font-size:10px;"> \d\d-\d\d </span>""", urllib.urlopen("http://hqjt.hit.edu.cn/").read(), re.M|re.X) T = time.strftime('%Y/%m/%d', time.localtime(time.time())) #T = "2014/10/24" URL = [] for x in db.get_all('News'): URL.append(x.url) for i in m: date = "2014/" + i[-12:-7] date = date.replace('-', '/') if(cmp(date, T) != 0): continue url = "http://hqjt.hit.edu.cn" l = i.split(' target="_blank">') url += l[0] if(URL.count(url) != 0): continue URL.append(url) print url r = l[1].split("</a>") title = r[0].decode('gbk').encode('utf8') db.get_insert_news('News', str(url), title, date)
def ygb(): m = re.findall(r"""/news/show \S* \s\s* \S*\s*\S*\s*\S*\s*\S*" >""", urllib.urlopen("http://ygb.hit.edu.cn").read(), re.M|re.X) mm = re.findall(r""" \(\s \S * \s*\)""", urllib.urlopen("http://ygb.hit.edu.cn").read(), re.M|re.X) T = time.strftime('%Y/%m/%d', time.localtime(time.time())) #T = "2014/10/24" URL = [] for x in db.get_all('News'): URL.append(x.url) j = 0 for i in m: date = mm[j][2:-2] j += 1 date = date.replace('-', '/') if(cmp(date, T) != 0): continue url = "http://ygb.hit.edu.cn" l = i.split(" title=") url += l[0][0:-1] if(URL.count(url) != 0): continue URL.append(url) title = l[1][1:-2] db.get_insert_news('News', str(url), title, date)
def POST(self): str_xml = web.data() #获得post来的数据 xml = etree.fromstring(str_xml)#进行XML解析 msgType=xml.find("MsgType").text fromUser=xml.find("FromUserName").text toUser=xml.find("ToUserName").text if msgType == "event": mscontent = xml.find("Event").text if mscontent == "subscribe": reply = u'''您好,欢迎关注工大新闻!\n输入-a[空格][关键字]来新增关键字,输入-d[空格][关键字]来删除一条已经添加的关键字,输入-s来查看当前订阅的关键字, 输入-g xxxx/xx/xx来查看在xxxx年xx天xx日的相关新闻,输入news来查看当天的相关新闻,如果您有什么意见,请输入 -f[空格]反馈信息 来提交意见。''' return self.render.reply_text(fromUser,toUser,int(time.time()), reply) if mscontent == "unsubscribe": reply = u'''我知道当前的功能很简单,但是我会慢慢改进,欢迎以后再来!''' return self.render.reply_text(fromUser,toUser,int(time.time()), reply) elif msgType == "text": content=xml.find("Content").text#获得用户所输入的内容 if content == "-h": reply = u'''输入-a[空格][关键字]来新增关键字,输入-d[空格][关键字]来删除一条已经添加的关键字,输入-s来查看当前订阅的关键字, 输入-g xxxx/xx/xx来查看在xxxx年xx天xx日的相关新闻,输入news来查看当天的相关新闻,如果您有什么意见,请输入 -f[空格]反馈信息 来提交意见。示例:-a 哈工大。''' return self.render.reply_text(fromUser,toUser,int(time.time()), reply) elif content == "-s": keyword = [] for user in db.get_all('User'): if user.openID == fromUser: keyword.append(user.keyword) keyword = db.unique(keyword) #return self.render.reply_text(fromUser,toUser,int(time.time()), "123") ret = "" for key in keyword: ret += key + " " return self.render.reply_text(fromUser,toUser,int(time.time()), ret) elif content == "news": #ret = "no news yet!" keyword = [] for user in db.get_all('User'): if user.openID == fromUser: keyword.append(user.keyword) keyword = db.unique(keyword) #return self.render.reply_text(fromUser,toUser,int(time.time()), "123") #return self.render.reply_text(fromUser,toUser,int(time.time()), '#') ret = "" T = time.strftime('%Y/%m/%d', time.localtime(time.time())) for x in db.get_all('News'): if(cmp(x.date, T) != 0): continue for key in keyword: if x.title.find(key) != -1: ret += x.title + "\n" + x.url + "\n" break if ret == "": return self.render.reply_text(fromUser,toUser,int(time.time()), "今天到目前为止还没有与您订阅关键字有关的新闻,请持续关注。") return self.render.reply_text(fromUser,toUser,int(time.time()), ret) else: if content.find("-a") == 0: tmp = content[3: len(content)] cnt = 0 #return self.render.reply_text(fromUser,toUser,int(time.time()), "tmp") for user in db.get_all('User'): #return self.render.reply_text(fromUser,toUser,int(time.time()), "already exists!") if (user.keyword == tmp) and (user.openID == fromUser): return self.render.reply_text(fromUser,toUser,int(time.time()), "您已经关注了此关键字了呀!") if (user.openID == fromUser): cnt += 1 if cnt >= 10: return self.render.reply_text(fromUser,toUser,int(time.time()), "关键词的上限为10个,请使用 -d[空格]关键字 来删除一些关键字吧~") db.get_insert('User', fromUser, tmp) return self.render.reply_text(fromUser,toUser,int(time.time()), "插入成功。") elif content.find("-d") == 0: #return self.render.reply_text(fromUser,toUser,int(time.time()), "123") tmp = content[3: len(content)] flag = 0 for user in db.get_all('User'): if (user.keyword == tmp) and (user.openID == fromUser): db.get_delete('User', "id=" + str(user.id)) flag = 1 if flag == 1: return self.render.reply_text(fromUser,toUser,int(time.time()), "删除成功。") else: return self.render.reply_text(fromUser,toUser,int(time.time()), "您并没有订阅此关键字。") elif content.find("-f") == 0: tmp = content[3: len(content)] fktime = time.strftime('%Y/%m/%d %H:%M',time.localtime()) db.get_insert_feedback('feedback', fromUser, fktime, tmp) return self.render.reply_text(fromUser,toUser,int(time.time()), "您的反馈信息已经提交,感谢您的支持!") elif content.find("-g") == 0: #return self.render.reply_text(fromUser,toUser,int(time.time()), "123") keyword = [] for user in db.get_all('User'): if user.openID == fromUser: keyword.append(user.keyword) keyword = db.unique(keyword) T = content[3: len(content)] ret = "" for x in db.get_all('News'): if(cmp(x.date, T) != 0): continue #return self.render.reply_text(fromUser,toUser,int(time.time()), T) for key in keyword: if x.title.find(key) != -1: ret += x.title + "\n" + x.url + "\n" break if ret == "": return self.render.reply_text(fromUser,toUser,int(time.time()), "当天没有与您关键字相关的新闻,或者请查看命令输入格式是否正确,例如-g 2014/10/26。注意当前只能查询到10月28日以后的一些新闻。") return self.render.reply_text(fromUser,toUser,int(time.time()), ret) else: return self.render.reply_text(fromUser,toUser,int(time.time()), "没有这样的命令!请输入 -h 来查看帮助。") return self.render.reply_text(fromUser,toUser,int(time.time()), ret)
def sogou(Key, Site): #return "123" user = [] tit = [] ur =[] for keyword in Key: for site in Site: url = 'http://www.sogou.com/web?q=' + my_urlencode(keyword) + '&query=%22' + my_urlencode(keyword) + '%22++site%3A' + site + '&fieldtitle=&fieldcontent=&fieldstripurl=&bstype=&ie=utf8&include=checkbox&sitequery=' + site + '&located=0&tro=on&filetype=&num=10' #print url target_urls = [] target_urls.append(url) page_num = 10 URL = [] for x in db.get_all('news'): URL.append(x.url) for cnt in range(page_num): #print "===============第",cnt+1,"页===============" if target_urls[cnt] == "END_FLAG": break #return get_res_list_and_next_page_url(target_urls[cnt]) res_lists,next_page_url = get_res_list_and_next_page_url(target_urls[cnt]) #return url + '\n' + next_page_url if next_page_url: #考虑没有“下一页”的情况 target_urls.append(next_page_url) else: target_urls.append("END_FLAG") for index in range(len(res_lists)): #print "第",index+1,"个搜索结果..." #获取title和url news, url = get_title_and_url(res_lists[index]) #print "标题:", news #print "URL:", url if news == 'Sheet1' or news == '后勤集团' or news.find('综合信息服务') != -1: continue if url.find('depart') != -1: continue if(URL.count(url) != 0): continue URL.append(url) #获取日期 date = get_date(res_lists[index]) day = time.strftime('%Y/%m/%d', time.localtime(time.time())) j = day.split('/') today = int(j[0]) * 600 + int(j[1]) * 40 + int(j[2]) if date < today - 1: target_urls[cnt + 1] = "END_FLAG" break #print "日期:",date #获取描述 abstract = get_abstract(res_lists[index]) #print "概要:",abstract #获取图片URL photo = get_photo(res_lists[index]) #print "photo: ", photo #print "\r\n\r\n" db.insert_news(url, news, photo, date, keyword) for x in db.get_all('keyword'): if (x.word == keyword) and (user.count(x.push_id) == 0): user.append(x.push_id) tit.append(news + '$' + url) ur.append(url); '''
def POST(self): S = web.input() #return S #return S.get('op') push_id = urllib.unquote(S.get('id')) word = urllib.unquote(S.get('word')) op = S.get('op') if op == '0': Key = [] for user in db.get_all('keyword'): if (user.push_id == push_id): Key.append(user.word) ans = [] for news in db.get_all('news'): #return urllib.quote(news.title.encode('utf8')) if len(Key) == 0: ans.append(news) continue for x in Key: if news.title.find(x) != -1 or news.word == x: #return "123" ans.append(news) #ret += news.url + '$' + news.title + '$' + news.image + '$' break ret = "" cnt = 0 for news in ans[::-1]: cnt += 1 ret += news.url + '$' + news.title + '$' + news.image + '$' if (cnt == 10): break ret = urllib.quote(ret.encode('utf8')) return ret elif op == '4': Key = [] for user in db.get_all('keyword'): if (user.push_id == push_id): Key.append(user.word) ans = [] for news in db.get_all('news'): #return urllib.quote(news.title.encode('utf8')) for x in Key: if news.title.find(x) != -1 or news.word == x or len(Key) == 0: ans.append(news); #ret += news.url + '$' + news.title + '$' + news.image + '$' break; ret = "" if (word == 'new'): cnt = 0 for news in ans[::-1]: cnt += 1 ret += news.url + '$' + news.title + '$' + news.image + '$' if (cnt == 10): break else: cnt = 0 flag = 0 for news in ans[::-1]: if (news.url == word): flag = 1 continue if flag == 0: continue if cnt == 10: break cnt += 1 ret += news.url + '$' + news.title + '$' + news.image + '$' ret = urllib.quote(ret.encode('utf8')) return ret elif op == '1': flag = 0 for user in db.get_all('keyword'): if (user.push_id == push_id) and (user.word == word): flag = 1 break if flag == 1: return "failed" db.insert_keyword(push_id, word); return "succeed" elif op == '2': flag = 0 for user in db.get_all('keyword'): if (user.push_id == push_id) and (user.word == word): db.delete_keyword("id=" + str(user.id)) flag = 1 break if flag == 0: return "failed" return "succeed" elif op == '3': Site = [] #__devilcomment = ''' Site.append('today.hit.edu.cn') Site.append('news.hit.edu.cn') Site.append('hqjt.hit.edu.cn') Site.append('hituc.hit.edu.cn') Site.append('jwc.hit.edu.cn') Site.append('acm.hit.edu.cn') Site.append('studyathit.hit.edu.cn') Site.append('yzb.hit.edu.cn') Site.append('zsb.hit.edu.cn') Site.append('ygb.hit.edu.cn') Site.append('www.usp.com.cn') #return "123" for user in db.get_all('URL'): if user.push_id == push_id: db.delete_URL("id=" + str(user.id)) #return "123" if word == "": return "failed" word = word[0 : -1] l = word.split('$') for x in Site: if l.count(x) == 0: db.insert_URL(x, push_id) return "succeed" elif op == '5': fktime = time.strftime('%Y/%m/%d %H:%M',time.localtime()) db.insert_feedback(push_id, fktime, word) return "succeed" elif op == '6': for user in db.get_all('keyword'): if user.push_id == push_id: db.delete_keyword("id=" + str(user.id)) return "succeed" ret = urllib.quote(ret.encode('utf8'))
def clear(): for x in db.get_all('News'): db.get_delete('News', "id=" + str(x.id))