示例#1
0
def sendLogJob():
    try:
        if ConfigHelper.Get(configPath, 'email', 'isSend') == '1':
            SendEmail.Send(
                configPath, '每天的日志文件',
                os.path.join(
                    os.path.split(sys.path[0])[0], 'log',
                    str((datetime.timedelta(days=-1) +
                         datetime.datetime.now()).date())))
            log.Log('Email发送成功:%s' % datetime.datetime.now())
    except:
        log.Log(traceback.format_exc())
示例#2
0
 def SvcStop(self):
     # 先告诉SCM停止这个过程
     self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
     # 设置事件
     win32event.SetEvent(self.hWaitStop)
     # 停止 true表示执行完job再停止
     if self.scheduler.running:
         self.scheduler.shutdown(False)
     log.Log('service stop')
示例#3
0
 def SvcDoRun(self):
     while True:  # 要加while循环 否则服务只能运行1次 启动服务时有提示
         if win32event.WaitForSingleObject(
                 self.hWaitStop, 5000) == win32event.WAIT_OBJECT_0:
             # 当stop的时候 会再次触发进入到 while里面  这时会break
             break
         if not self.scheduler.running:
             log.Log('service start')
             # 此时程序 会停到这里 此时的running是true 之前都是false
             self.scheduler.start()
示例#4
0
 def __init__(self, args):
     win32serviceutil.ServiceFramework.__init__(self, args)
     self.hWaitStop = win32event.CreateEvent(None, 0, 0, None)
     self.scheduler = BlockingScheduler(logger=None)
     log.Log('service init')
     self.scheduler.add_job(crawlerJob,
                            'cron',
                            hour=ConfigHelper.Get(configPath, 'quartz',
                                                  'hour'),
                            minute=ConfigHelper.Get(configPath, 'quartz',
                                                    'minute'))
     self.scheduler.add_job(crawlerNoticeJob,
                            'cron',
                            hour=ConfigHelper.Get(configPath, 'quartz',
                                                  'hour'),
                            minute=ConfigHelper.Get(configPath, 'quartz',
                                                    'minute'))
     self.scheduler.add_job(sendLogJob,
                            'cron',
                            hour=ConfigHelper.Get(configPath, 'email',
                                                  'hour'),
                            minute=ConfigHelper.Get(configPath, 'email',
                                                    'minute'))
示例#5
0
def crawlerJob():
    sys.setrecursionlimit(1000000000)
    log = LogHelper('-%s' % sys._getframe().f_code.co_name)
    page = 1
    totalPape = ConfigHelper.Get(configPath, 'base', 'totalPage')
    dbHelper = DbHelper(configPath, 'News')
    log.Log('%s start' % sys._getframe().f_code.co_name)
    while page <= int(totalPape):
        if page != 1:
            url = 'http://www5.ncwu.edu.cn/channels/4_%s.html' % page
        else:
            url = 'http://www5.ncwu.edu.cn/channels/4.html'
        html = urllib.urlopen(url)
        soup = bs4.BeautifulSoup(html, 'html.parser')
        if soup == None:
            log.Log('没有获取到列表页html文档内容:%s' % url)
            continue
        div = soup.find('div', class_='xinxilist')
        ul = div.find('ul')
        for li in ul.find_all('li'):
            href = li.find('a')['href']
            date = li.find('i').get_text()
            id = re.search(r'(\d+).html', href).groups(1)[0]
            title = li.find('a').get_text()
            try:
                time.sleep(1)
                html = urllib.urlopen(href)
                contentHtml = bs4.BeautifulSoup(html.read(), 'html.parser')
            except:
                log.Log(traceback.format_exc() + ' 获取内容页HTML文档内容异常:%s' % href)
                continue

            if contentHtml == None:
                log.Log('没有获取到内容页HTML文档内容:%s' % href)
                continue
            content = contentHtml.find('div', attrs={'align': 'left'})
            if content == None:
                log.Log('没有获取到内容页div{align: left}内容:%s' % href)
                continue
            content = content.find_parent('div', attrs={'align': 'center'})

            if content == None:
                log.Log('没有获取到内容页div{align: center}内容:%s' % href)
                continue

            try:
                res = dbHelper.Select(id)
                if res.fetchone() == None:
                    dbHelper.Add([{
                        'articleid': id.strip(),
                        'href': href.strip(),
                        'Title': title.strip(),
                        'date': date.strip(),
                        'NeiRong': content.prettify(),
                        'LeiXing': 0,
                        'UserID': 1
                    }])
            except Exception:
                log.Log(traceback.format_exc() + '数据库操作异常:%s' % href)
                # 应该是重试
        log.Log('page=%s' % page)
        page += 1

    log.Log('%s ok' % sys._getframe().f_code.co_name)