示例#1
0
def Notification_sdcs(url, page, start=1, end=5):
    global sdcs_dict
    url += "%s"
    signal = 1
    for i in range(start, end):
        if i % 2 == 0:
            try:
                tagpage = url % i
                req = urllib.request.Request(url=page, headers=headers)
                c = urllib.request.urlopen(req)
                read_soup = BeautifulSoup(c.read(),
                                          "html.parser",
                                          from_encoding="utf-8")
                #print read_soup
                search_list = read_soup.find_all(id=tagpage)[0]
                #print search_list
                dl = search_list("ul")[0]
                for li in dl("li"):
                    date = li.find_all("span")
                    try:
                        #print li.find_all("a")[0]["title"]
                        if li.find_all(
                                "a")[0]["title"] not in sdcs_dict.keys():
                            print("hehe")
                            sdcs_dict[li.find_all("a")[0]
                                      ["title"]] = date[0].get_text()
                            iMessage.send_Message(
                                News=li.find_all("a")[0]["title"],
                                sub='News From SDCS')
                            #iMessage.send_Message(News='News From SDCS')
                            signal = 0
                        else:
                            continue
                    except Exception:
                        msg = traceback.format_exc()
                        if li.find_all(
                                "a")[0].get_text() not in sdcs_dict.keys():
                            sdcs_dict[li.find_all("a")
                                      [0].get_text()] = date[0].get_text()
                            print("xixi")
                            signal = 0
                            iMessage.send_Message(
                                News=li.find_all("a")[0].get_text(),
                                sub='News From SDCS')
                            #iMessage.send_Message(News='News From SDCS')
                        else:
                            continue
            except Exception:
                msg = traceback.format_exc()
                print(msg)
    if signal == 1:
        print("No News From SDCS!")
示例#2
0
def JudgeNews():
    signal = 0
    links = getSWREgnews()  # 调用getSWREnews()函数获取信息
    for i in links:
        if i.get('title') not in NewsTitle:  # 对新调用函数后获得的links遍历,
            signal = 1  # 将signal标志为1,方便接下来情况并重新保存NewsTitle
            url = 'http://www.swre.cugb.edu.cn/' + i.get('href')
            content = Article(url, language='zh')
            content.download()
            content.parse()
            iMessage.send_Message(News=content.text + "\n" + url, sub='水环通告:' + i.get('title'))
            # 将不存在于NewsTitle中的新闻发送至指定邮箱
    if signal == 1:  # 根据前面遍历后的signal信号
        NewsTitle.clear()  # 若为1,则清空NewsTitle列表
        for i in links:
            NewsTitle.append(i.get('title'))  # 重新遍历links,将title保存至NewsTitle
    # print(NewsTitle) 测试专用,否则请注释
    t = Timer(300, JudgeNews)  # 每300秒执行一次该函数
    t.start()
    print(NewsTitle)
示例#3
0
class Crawl(object):
    def __init__(self):
        self.urls = url_manager.UrlManager()
        self.downloader = html_downloader.HtmlDownloader()
        self.parser = html_parser.HtmlParser()
        self.outputer = html_outputer.HtmlOutputer()

    def craw(self, root_urls):
        count = 1
        for root_url in root_urls:
            self.urls.add_new_url(root_url)
            while self.urls.has_new_url():
                try:
                    new_url = self.urls.get_new_url()
                    print 'craw %d : %s' % (count, new_url)
                    html_cont = self.downloader.download(new_url)
                    new_urls, new_data = self.parser.parse(new_url, html_cont)
                    self.urls.add_new_urls(new_urls)
                    self.outputer.collect_data(new_data)
                    count = count + 1
                except Exception, e:
                    print 'craw failed!'
                    #print 'str(Exception):\t', str(Exception)
                    #print 'str(e):\t\t', str(e)
                    #print 'repr(e):\t', repr(e)
                    #print 'e.message:\t', e.message
                    #print 'traceback.print_exc():'; traceback.print_exc()
                    #print 'traceback.format_exc():\n%s' % traceback.format_exc()
        datass = self.outputer.output_html()
        News = ''
        for datas in datass:
            for data in datas:
                News += datas[data] + '\n'
        #print News
        if News != '':
            iMessage.send_Message(News, 'CQUT_News')
示例#4
0
def JudgeNews():
    signal = 0
    links1, links2, links3 = getCUGBgranews()  # 调用getCUGBgranews()函数获取信息
    # print(links1) #测试专用,否则请注释
    # print(links2) #测试专用,否则请注释
    # print(links3) #测试专用,否则请注释
    for i, j, k in zip(links1, links2, links3):
        if i.get('title') not in NewsTitle:  # 对新调用函数后获得的links1遍历,
            signal = 1  # 将signal标志为1,方便接下来情况并重新保存NewsTitle
            url = 'https://www1.cugb.edu.cn/' + i.get('href')
            content = Article(url, language='zh')
            content.download()
            content.parse()
            iMessage.send_Message(News=content.text + "\n" + url,
                                  sub='📌研院·培养与学籍:📢 ' + i.get('title'))
            # 将不存在于NewsTitle中的新闻发送至指定邮箱
        if j.get('title') not in NewsTitle:
            signal = 1  # 将signal标志为1,方便接下来情况并重新保存NewsTitle
            url = 'https://www1.cugb.edu.cn/' + j.get('href')
            content = Article(url, language='zh')
            content.download()
            content.parse()
            iMessage.send_Message(News=content.text + "\n" + url,
                                  sub='📌研院·学科与学位:📢 ' + j.get('title'))
        if k.get('title') not in NewsTitle:
            signal = 1  # 将signal标志为1,方便接下来情况并重新保存NewsTitle
            url = 'https://www1.cugb.edu.cn/' + k.get('href')
            content = Article(url, language='zh')
            content.download()
            content.parse()
            iMessage.send_Message(News=content.text + "\n" + url,
                                  sub='📌研院·通知公告:📢 ' + k.get('title'))
            # 将不存在于NewsTitle中的新闻发送至指定邮箱
    if signal == 1:  # 根据前面遍历后的signal信号
        NewsTitle.clear()  # 若为1,则清空NewsTitle列表
        for i, j, k in zip(links1, links2, links3):
            NewsTitle.append(i.get('title'))  # 重新遍历links,将title保存至NewsTitle
            NewsTitle.append(j.get('title'))  # 重新遍历links,将title保存至NewsTitle
            NewsTitle.append(k.get('title'))  # 重新遍历links,将title保存至NewsTitle
    # print(NewsTitle) #测试专用,否则请注释
    t = Timer(300, JudgeNews)  # 每300秒执行一次该函数
    t.start()
示例#5
0
    elif index == 3:
        news = "\n\n【研院通知·学位与学科】" + "\n"
        for i in links:
            No = No + 1
            news =  news + "\n" + str(No) + "、" + i.get('title') + \
                    "  ☛" + "https://www1.cugb.edu.cn/" + i.get('href')
    elif index == 4:
        news = "\n\n【研院通知·通知公告】" + "\n"
        for i in links:
            No = No + 1
            news =  news + "\n" + str(No) + "、" + i.get('title') + \
                    "  ☛" + "https://www1.cugb.edu.cn/" + i.get('href')
    elif index == 5:
        news = "\n\n【水环通知】" + "\n"
        for i in links:
            No = No + 1
            news =  news + "\n" + str(No) + "、" + i.get('title') + \
                    "  ☛" + "http://www.swre.cugb.edu.cn/" + i.get('href')
    return news


if __name__ == "__main__":
    CUGBznews = getCUGBznews()
    CUGBgnews = getCUGBgnews()
    CUGBdnews = getCUGBdnews()
    CUGBnotice = getCUGBnotice()
    SWREnotice = getSWREnotice()
    news = CUGBznews + CUGBgnews + CUGBdnews + CUGBnotice + SWREnotice
    date = time.strftime("(%Y%m%d)", time.localtime(time.time()))
    iMessage.send_Message(News=news, sub='地大研院今日要闻' + date)