예제 #1
0
파일: webservice.py 프로젝트: cash2one/ZPB
 def stoploop():
     now = time.time()
     if now < deadline and (io_loop._callbacks or io_loop._timeouts):
         io_loop.add_timeout(now + 1, stoploop)
     else:
         io_loop.stop()
         logger.info('Http Server shutdown')
예제 #2
0
def DoResumePayDown(companyid, siteid, syncid, fromsitecode, ownpay, taskid):
    try:
        if ownpay == 'T':
            handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid)
        else:
            # 获取公用账号企业ID
            common_companyid = 0
            handler = SiteConfig.GetTaskHandler(common_companyid, siteid,
                                                taskid)
        # 账号未验证通过
        if handler.bind.check_status == 10:
            raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败')
        try:
            handler.resume_searcher_down(fromsitecode, syncid, companyid)
            # 任务状态回写
            task = ResSyncDistribute.queryWithId(syncid)
            if HANDLE_STATUS.SUCCESS == handler.status:
                task.sync_status = 2010
                logger.info(u'<{}>下载付费简历<{}>成功!'.format(
                    handler.name, fromsitecode, handler.message))
            else:
                task.sync_status = 10
                task.error_message = handler.message
                logger.error(u'<{}>下载付费简历<{}>失败,原因:{}'.format(
                    handler.name, fromsitecode, handler.message))
            task.save()
        except BaseException as e:
            raise UnHandleRuntimeError(e)
    except BaseError as e:
        task = DBTask.queryWithId(syncid)
        task.sync_status = 10
        task.fail_num += 1
        task.log_info = e.message
        task.save()
예제 #3
0
def CheckResumeSearchDownStat(taskid):
    key = Conf.RESUME_IMPORT_HKEY % taskid
    stat = ResumeRedisCli.hgetall(key)
    if stat:
        # finish
        finish = int(stat.get('finish', '0'))
        if finish:
            # 统计信息
            total = int(stat['total'])
            grab = int(stat['grab'])
            succ = int(stat['success'])
            ignore = int(stat['ignore'])
            fail = int(stat['failure'])
            # 来源信息
            siteid = int(stat['siteid'])
            importid = stat['importid']
            companyid = int(stat['companyid'])
            imphistoryid = int(stat['imphistoryid'])
            #
            task = ResSyncDistribute.queryWithId(importid)
            if task:
                if total > 0 and total <= succ + ignore + fail:
                    if total <= succ + ignore + fail:
                        if ResumeRedisCli.delete(key):
                            imp = ImpHistory.queryByHistoryId(imphistoryid)
                            if imp:
                                imp.succ_num = succ
                                imp.fail_num = fail
                                imp.end_time = datetime.today()
                                imp.proc_status = 1
                                if imp.succ_num == 0:
                                    imp.is_valid = 'F'
                                imp.save()
                            # 更改简历状态
                            ResumeBase.changeStat(task.resume_code)
                            # 回写任务状态
                            msg = u'<{}>下载付费简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format(
                                SiteConfig.getSiteNameById(siteid), total,
                                grab, succ, ignore, fail)
                            task.sync_status = 20
                            task.error_message = ''
                            task.process_time = datetime.today()
                            task.save()
                            logger.info(msg)
                            dtc. async ('zpb.service.handleservice.DoInfo',
                                        *(companyid, siteid, taskid))
                elif total == 0:
                    if ResumeRedisCli.delete(key):
                        ImpHistory.removeByHistoryId(imphistoryid)
                        msg = u'<{}>下载付费简历总数:0'.format(
                            SiteConfig.getSiteNameById(siteid))
                        task.sync_status = 20
                        task.error_message = ''
                        task.process_time = datetime.today()
                        task.save()
                        logger.info(msg)
예제 #4
0
파일: app.py 프로젝트: cash2one/ZPB
 def stop(self):
     if not self.sentinel.is_alive():
         return False
     logger.info('Application-{} stopping.'.format(self.pid))
     self.stop_event.set()
     self.sentinel.join()
     logger.info('Application-{} has stopped.'.format(self.pid))
     self.start_event = None
     self.stop_event = None
     return True
예제 #5
0
파일: app.py 프로젝트: cash2one/ZPB
 def stop(self):
     name = current_process().name
     logger.info('{} stopping application processes'.format(name))
     # Stopping pusher
     self.event_out.set()
     # Wait for it to stop
     while self.cluster.is_alive():
         sleep(0.1)
     while self.webapper.is_alive():
         sleep(0.1)
예제 #6
0
 def wait(self):
     while 1:
         try:
             self.poll(True)
         except NoResponsePending:
             logger.info(u'Thread Pool stoped by NoResponsePending')
             break
         except NoWorkerAvailable:
             logger.info(u'Thread Pool stoped by NoWorkerAvailable')
             break
예제 #7
0
파일: app.py 프로젝트: cash2one/ZPB
 def start(self):
     # start Sentinel
     self.stop_event = Event()
     self.start_event = Event()
     self.sentinel = Process(target=Sentinel,
                             args=(self.stop_event, self.start_event))
     self.sentinel.start()
     logger.info('Application-{} starting.'.format(self.pid))
     while not self.start_event.is_set():
         sleep(0.1)
     return self.pid
예제 #8
0
def DoResumeSearcher(companyid, siteid, taskid, searcherid, **kwargs):
    try:
        handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid)
        ResumeSearcher.updateImportTimeBySearcherId(searcherid)
        if handler.bind.check_status in [0, 50]:
            handler.resume_search(searcherid)
        else:
            handler.message = u'绑定账号登录失败'
        logger.info(handler.message)
    except BaseError:
        pass
예제 #9
0
def GetBaikLink(linkid, linkname):
    logger.info(u'正在处理关键词<{}>'.format(linkname))
    linkval = _getHyperLink(linkname)
    if linkval:
        try:
            if linkval.find('none') == -1:
                HyperLink.appendLink(linkid, linkval, 'T')
            else:
                HyperLink.appendLink(linkid, linkval, 'F')
        except:
            pass
예제 #10
0
def Do20(companyid, siteid, syncid, taskid, **kwargs):
    task = DBTask.queryWithId(syncid)
    try:
        handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid)
        # 账号已解除绑定
        if handler.bind.is_valid != 'T':
            DBTask.newSchedule(companyid, siteid, False)
            raise CompanyAccountUnBindError(companyid, siteid, u'账号已解除绑定')
        # 账号未验证通过
        if handler.bind.check_status == 10:
            raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败')
        try:
            handler.position_import()
            if HANDLE_STATUS.SUCCESS == handler.status:
                task.sync_status = 2000
                task.succ_num += 1
                task.log_info = handler.message
                # 账号绑定
                handler.bind.check_status = 50
                handler.bind.login_result = '登录成功'
                handler.bind.last_succ_time = datetime.today()
                handler.bind.save()
                # 下发其它异步任务
                infouid = md5(taskid + 'info')
                dtc. async ('zpb.service.handleservice.DoInfo',
                            *(companyid, siteid, infouid),
                            uid=infouid)
                resuid = md5(taskid + '201')
                dtc. async ('zpb.service.handleservice.Do201',
                            *(companyid, siteid, syncid, resuid),
                            uid=resuid)
                logger.info(handler.message)
            elif HANDLE_STATUS.AGAIN == handler.status:
                task.sync_status = 11
                task.fail_num += 1
                task.log_info = handler.message
                logger.error(handler.message)
            else:
                task.sync_status = 10
                task.fail_num += 1
                task.log_info = handler.message
                logger.error(handler.message)
        except BaseException as e:
            raise UnHandleRuntimeError(e)
    except BaseError as e:
        task.sync_status = 10
        task.fail_num += 1
        task.log_info = e.message
    task.save()
예제 #11
0
파일: app.py 프로젝트: cash2one/ZPB
 def guard(self):
     logger.info('{} guarding Application at {}'.format(
         current_process().name, self.pid))
     self.start_event.set()
     logger.info('Application-{} running.'.format(self.parent_pid))
     cycle = 0.5  # guard loop sleep in seconds
     # guard loop. Runs at least once
     while not self.stop_event.is_set():
         # Check dispatcher
         if not self.dispatcher.is_alive():
             self.dispatcher = self.spawn_dispatcher()
             logger.error(
                 'reincarnated dispatcher {} after sudden death'.format(
                     self.dispatcher.name))
         sleep(cycle)
     self.stop()
예제 #12
0
파일: webservice.py 프로젝트: cash2one/ZPB
def shutdown():
    logger.info('Stopping Http Server')
    http_server.stop()
    logger.info('Http Server will shutdown in %s seconds ...', 60)
    io_loop = tornado.ioloop.IOLoop.instance()
    deadline = time.time() + 60

    def stoploop():
        now = time.time()
        if now < deadline and (io_loop._callbacks or io_loop._timeouts):
            io_loop.add_timeout(now + 1, stoploop)
        else:
            io_loop.stop()
            logger.info('Http Server shutdown')

    stoploop()
예제 #13
0
 def damaByUrl(self, dtype, url):
     parseurl = urlparse(url)
     fmturl = parseurl.scheme + '://' + parseurl.hostname + parseurl.path
     if self.bind:
         damakey = 'dama:{}:{}:{}:{}'.format(self.bind.company_id,
                                             self.bind.site_id,
                                             self.bind.login_name, fmturl)
     else:
         damakey = 'dama:{}'.format(fmturl)
     ResumeRedisCli.set(damakey, url)
     try:
         logger.info(u'<{}>进行验证码打码'.format(self.name))
         ret = Dama2().d2Url(dtype, url)
         logger.info(u'<{}>验证码打码完成'.format(self.name))
         return ret['verify']
     finally:
         ResumeRedisCli.delete(damakey)
예제 #14
0
파일: mailservice.py 프로젝트: cash2one/ZPB
def download_email(pop, emailconf, mid, uidl, taskid):
    logger.info(u'[*] 正在下载邮箱<{}>第<{}>封邮件'.format(emailconf.email_user, mid))
    try:
        typ, data, octets = pop.retr(mid)
        msg = email.message_from_string('\n'.join(data))
    except error_proto as e:
        logger.error(u'[*] 邮箱<{}>中的第<{}>封邮件下载失败,原因:{}'.format(
            emailconf.email_user, mid, e))
        return
    # Parse and save email content/attachments
    try:
        parse_email(emailconf, msg, mid, taskid)
        setkey = MAIL_SET_KEY % emailconf.email_user
        MailRedisCli.sadd(setkey, uidl)
    except BaseException as e:
        logger.error(u'[*] 邮箱<{}>中的第<{}>封邮件解析失败,原因:{}'.format(
            emailconf.email_user, mid, e))
예제 #15
0
def CheckResumeSearchStat(taskid):
    key = Conf.RESUME_IMPORT_HKEY % taskid
    stat = ResumeRedisCli.hgetall(key)
    if stat:
        # finish
        finish = int(stat.get('finish', '0'))
        if finish:
            # 统计信息
            total = int(stat['total'])
            grab = int(stat['grab'])
            succ = int(stat['success'])
            ignore = int(stat['ignore'])
            fail = int(stat['failure'])
            # 来源信息
            siteid = int(stat['siteid'])
            importid = stat['importid']
            companyid = int(stat['companyid'])
            imphistoryid = int(stat['imphistoryid'])
            #
            if total > 0 and total <= succ + ignore + fail:
                if total <= succ + ignore + fail:
                    if ResumeRedisCli.delete(key):
                        imp = ImpHistory.queryByHistoryId(imphistoryid)
                        if imp:
                            imp.succ_num = succ
                            imp.fail_num = fail
                            imp.end_time = datetime.today()
                            imp.proc_status = 1
                            if imp.succ_num == 0:
                                imp.is_valid = 'F'
                            imp.save()
                        # 更新bind简历最后导入时间(siteid取自key,task中的site_id可能=0)
                        AuthService().updateBindImportTimeByCompanyIdAndSiteId(
                            companyid, siteid)
                        # 回写任务状态
                        msg = u'<{}>简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format(
                            SiteConfig.getSiteNameById(siteid), total, grab,
                            succ, ignore, fail)
                        logger.info(msg)
            elif total == 0:
                if ResumeRedisCli.delete(key):
                    ImpHistory.removeByHistoryId(imphistoryid)
                    # 更新bind简历最后导入时间(siteid取自key,task中的site_id可能=0)
                    AuthService().updateBindImportTimeByCompanyIdAndSiteId(
                        companyid, siteid)
예제 #16
0
 def damaByUrlImage(self, dtype, url):
     parseurl = urlparse(url)
     fmturl = parseurl.scheme + '://' + parseurl.hostname + parseurl.path
     if self.bind:
         damakey = 'dama:{}:{}:{}:{}'.format(self.bind.company_id,
                                             self.bind.site_id,
                                             self.bind.login_name, fmturl)
     else:
         damakey = 'dama:{}'.format(fmturl)
     ResumeRedisCli.set(damakey, url)
     try:
         logger.info(u'<{}>进行验证码打码'.format(self.name))
         response = self._httpClient.call(url, authentication=False)
         ret = Dama2().d2File(dtype, response.data)
         logger.info(u'<{}>验证码打码完成'.format(self.name))
         return ret['verify']
     finally:
         ResumeRedisCli.delete(damakey)
예제 #17
0
파일: app.py 프로젝트: cash2one/ZPB
def dispatcher(event):
    def _async_task(tasks):
        # 分发异步任务
        for task in tasks:
            taskname = task.get('name', False)
            if taskname:
                logger.debug(u'dispatcher task named <{}>'.format(taskname))
            args = task.get('args', ())
            kwargs = task.get('kwargs', {})
            kwargs['uid'] = task['id']
            dtc. async (task['func'], *args, **kwargs)

    #
    name = current_process().name
    pid = current_process().pid
    logger.info('{} dispatch tasks at {}'.format(name, pid))
    while not event.is_set():
        try:
            # 任务指令中心
            task_set = DBTask.queryPending() or []
            _async_task(task_set)
            # 手动简历解析
            task_set = ImpLocalFile.queryPending() or []
            _async_task(task_set)
            # 付费简历下载
            task_set = ResSyncDistribute.queryPending() or []
            _async_task(task_set)
            # 邮件搜索定时任务
            task_set = EmailConf.queryPending() or []
            _async_task(task_set)
            # 简历搜索定时任务
            task_set = ResumeSearcher.queryPending() or []
            _async_task(task_set)
            # 关键词百科超链接
            task_set = HyperLink.queryPending() or []
            _async_task(task_set)
            #
            sleep(2)
        except BaseException as e:
            logger.error(e)
            break
    logger.info('{} stopped dispatch tasks'.format(name))
예제 #18
0
파일: mailservice.py 프로젝트: cash2one/ZPB
def pop3(host, port, username, password, usessl):
    try:
        conn = poplib.POP3_SSL(host) if usessl else poplib.POP3(host)
        if debug_mode:
            conn.set_debuglevel(2)
        conn.user(username)
        conn.pass_(password)
        logger.info(u'[+] 邮箱<{}> 登录成功'.format(username))
        return conn, ''
    except error_proto:
        error_message = u'登录账号或密码错误'
        logger.error(u'[+] 邮箱<{}> 登录失败,原因:'.format(username, error_message))
        return None, error_message
    except socket.gaierror:
        error_message = u'非法POP3服务器名'
        logger.error(u'[+] 邮箱<{}> 登录失败,原因:<{}>为{}'.format(
            username, host, error_message))
        return None, error_message
    except BaseException as e:
        # 可能网络异常,可以再次执行
        logger.error(u'[+] 邮箱<{}> 登录失败,原因:{}'.format(username, str(e)))
        return None, None
예제 #19
0
 def post(self):
     ret = {'status': 1, 'message': ''}
     try:
         session = self.get_argument('sessionid', '')
         resumecodes = self.get_argument('resumecodes', '')
         if session:
             if resumecodes:
                 messages = decryptBindPasswd('zpb', session).split(':')
                 if len(messages) == 2:
                     companyid = messages[0]
                     logger.info(u'企业编码<{}>确认已下载简历'.format(companyid))
                     data = ResumeBase.ackExport(companyid,
                                                 resumecodes.split(','))
                     ret['status'] = 0
                     ret['message'] = u'简历状态确认成功!'
                 else:
                     ret['message'] = u'非法的请求!'
             else:
                 ret['message'] = u'未提交待确认简历编码!'
         else:
             ret['message'] = u'非法的请求!'
     finally:
         self.write(json.dumps(ret))
예제 #20
0
def Do201(companyid, siteid, syncid, taskid, **kwargs):
    task = DBTask.queryWithId(syncid)
    try:
        handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid)
        # 账号已解除绑定
        if handler.bind.is_valid != 'T':
            DBTask.newSchedule(companyid, siteid, False)
            raise CompanyAccountUnBindError(companyid, siteid, u'账号已解除绑定')
        # 账号未验证通过
        if handler.bind.check_status == 10:
            raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败')
        try:
            handler.resume_import(syncid)
        except BaseException as e:
            raise UnHandleRuntimeError(e)
        if HANDLE_STATUS.SUCCESS == handler.status:
            task.succ_num += 1
            task.log_info = handler.message
            task.sync_status = 20
            #
            dtc. async ('zpb.service.handleservice.DoInfo',
                        *(companyid, siteid, taskid))
            logger.info(handler.message)
        else:
            task.fail_num += 1
            task.log_info = handler.message
            if HANDLE_STATUS.AGAIN == handler.status:
                task.sync_status = 11
            else:
                task.sync_status = 10
            logger.error(handler.message)
    except BaseError as e:
        task.sync_status = 10
        task.fail_num += 1
        task.log_info = e.message
    task.save()
예제 #21
0
def DownResume(checkstatservice, **kwargs):
    data = kwargs.copy()
    taskid = data['taskid']
    companyid = data['companyid']
    siteid = data['siteid']
    username = data['username']
    resumeid = data['resumeid']
    postdate = data['postdate']
    # 强制刷新简历(用於付费简历下载)
    force = data.get('force', False)
    #
    sitename = SiteConfig.getSiteNameById(siteid)
    importkey = Conf.RESUME_IMPORT_HKEY % taskid
    try:
        try:
            # 简历更新度验证(投递日期)
            if not force:
                if not ResumeBase.isNew(companyid, siteid, resumeid, postdate):
                    # 未更新的简历将会被忽略
                    logger.debug(u'<{}>简历<{}, {}>重复下载!'.format(
                        sitename, username, resumeid))
                    ResumeRedisCli.hincrby(importkey, 'ignore')
                    return
            try:
                handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid)
                logger.info(u'正在下载<{}>简历<{}, {}>'.format(
                    sitename, username, resumeid))
                handler.resume_down(data)
                if handler.status == HANDLE_STATUS.SUCCESS:
                    logger.info(handler.message)
                    dtc. async ('zpb.service.resumeservice.ParseResume',
                                checkstatservice, **data),
                elif handler.status == HANDLE_STATUS.AGAIN:
                    logger.info(u'<{}>简历<{}, {}>需要重新下载'.format(
                        sitename, username, resumeid))
                    data['retry'] = data.get('retry', 0) + 1
                    dtc. async ('zpb.service.resumeservice.DownResume',
                                checkstatservice, **data),
                else:
                    ResumeRedisCli.hincrby(importkey, 'failure')
                    logger.error(handler.message)
            except BaseError as e:
                pass
        except BaseException as e:
            dtc. async ('zpb.service.resumeservice.DownResume',
                        checkstatservice, **data),
            logger.error(u'<{}>简历<{}, {}>下载异常,原因:{}'.format(
                sitename, username, resumeid, e))
    finally:
        dtc. async (checkstatservice, taskid)
예제 #22
0
def Do101(companyid, siteid, syncid, taskid, **kwargs):
    task = DBTask.queryWithId(syncid)
    try:
        jobid = kwargs.pop('jobid', None)
        if not jobid:
            raise InvalidParamError(companyid, siteid, u'未指定发布职位编号')
        handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid)
        # 账号已解除绑定
        if handler.bind.is_valid != 'T':
            DBTask.newSchedule(companyid, siteid, False)
            raise CompanyAccountUnBindError(companyid, siteid, u'账号已解除绑定')
        # 账号未验证通过
        if handler.bind.check_status == 10:
            raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败')
        # step 1
        dist = JobSyncDistribute.queryByJobIdAndCompanyIdWithSiteId(
            jobid, companyid, siteid)
        if not dist:
            raise JobNotDistributeError(companyid, siteid, u'未找到职位同步记录')
        try:
            if not dist.third_job_code:
                handler.position_add(jobid)
            else:
                handler.position_modify(jobid, dist.third_job_code,
                                        dist.last_sync_time)
            # step 2
            if HANDLE_STATUS.SUCCESS == handler.status:
                if hasattr(handler, 'thirdjobcode'):
                    dist.third_job_code = handler.thirdjobcode
                dist.sync_succ_num += 1
                dist.sync_status = 20
                dist.error_message = ''
                dist.last_sync_time = datetime.today()
                #
                BaseJob.updateSyncTimeByJobId(jobid)
                #
                dtc. async ('zpb.service.handleservice.DoInfo',
                            *(companyid, siteid, taskid))
            else:
                dist.sync_fail_num += 1
                dist.sync_status = 10
                dist.error_message = handler.message
            dist.save()
            # step 3
            jmm = JobMemo(jobid)
            if HANDLE_STATUS.SUCCESS == handler.status:
                jmm.memo_content = u'[{}]发布成功'.format(handler.name)
            else:
                jmm.memo_content = u'[{}]发布失败,{}'.format(
                    handler.name, handler.message)
            jmm.save()
            # step 4
            if HANDLE_STATUS.SUCCESS == handler.status:
                task.succ_num += 1
                task.log_info = handler.message
                task.sync_status = 20
                logger.info(handler.message)
            else:
                task.fail_num += 1
                task.log_info = handler.message
                if HANDLE_STATUS.AGAIN == handler.status:
                    task.sync_status = 11
                else:
                    task.sync_status = 10
                logger.error(handler.message)
        except BaseException as e:
            raise UnHandleRuntimeError(e)
    except BaseError as e:
        task.sync_status = 10
        task.fail_num += 1
        task.log_info = e.message
    task.save()
예제 #23
0
 def post(self):
     ret = {'status': 1, 'message': ''}
     try:
         # 提取post参数中name=file的文件元数据
         if self.request.files.has_key('file'):
             #
             try:
                 soapCli = SudsClient(Conf.YGYS['soapuri'],
                                      timeout=Conf.YGYS['timeout'])
             except BaseException as e:
                 logger.error(e)
                 ret['message'] = u'无法连接简历解析WebService服务'
                 return
             #
             try:
                 sitecode = self.get_argument('sitefromcode', '0')
                 maildate = self.get_argument('maildate', '')
                 filedata = self.request.files['file'][0]
                 filename = filedata['filename']
                 logger.info(u'解析简历文件<{}>'.format(filename))
                 # 文件扩展名
                 ext = os.path.splitext(filename)[-1]
                 if not ext: ext = '.text'
                 res = soapCli.service.TransResumeByJsonStringForFileBase64(
                     Conf.YGYS['username'], Conf.YGYS['password'],
                     base64.b64encode(filedata['body']), ext)
                 if res:
                     js = json.loads(res)
                     if js['Type'] == 0:
                         ret['message'] = js['Name']
                     elif not js['Name']:
                         ret['message'] = u'非完整简历'
                     else:
                         js['companyid'] = 0
                         js['jobid'] = 0
                         js['source'] = 0
                         js['apply_job_id'] = 0
                         js['siteid'] = sitecode
                         js['apply_time'] = maildate
                         js['websiteresumeid'] = js['WebSiteResumeID']
                         js['matching'] = 0
                         (res, message, new) = AssembelResumeByJson(js)
                         if res:
                             data = ResumeBase.queryAndExportByResumeCode(
                                 message)
                             ret['status'] = 0
                             ret['count'] = 1
                             ret['message'] = u'简历文件解析成功!'
                             ret['res_resume_base'] = data
                         else:
                             ret['message'] = message
                 else:
                     ret['message'] = u'解析结果空白!'
             except BaseException as e:
                 logger.error(e)
                 ret['message'] = u'简历解析内部服务错误!'
         else:
             ret['message'] = u'未上传需要解析的简历文件!'
     finally:
         if ret['status'] == 1:
             logger.error(ret['message'])
         self.write(json.dumps(ret))
예제 #24
0
def CheckEmailImportStat(taskid):
    key = Conf.RESUME_IMPORT_HKEY % taskid
    stat = ResumeRedisCli.hgetall(key)
    if stat:
        # finish
        finish = int(stat.get('finish', '0'))
        if finish:
            # 统计信息
            total = int(stat['total'])
            grab = int(stat['grab'])
            succ = int(stat['success'])
            ignore = int(stat['ignore'])
            fail = int(stat['failure'])
            # 来源信息
            siteid = stat['siteid']
            importid = stat['importid']
            companyid = stat['companyid']
            imphistoryid = int(stat['imphistoryid'])
            syncid = int(stat.get('syncid', None))
            #
            if total > 0 and total <= succ + ignore + fail:
                if total <= succ + ignore + fail:
                    if ResumeRedisCli.delete(key):
                        imp = ImpHistory.queryByHistoryId(imphistoryid)
                        if imp:
                            imp.succ_num = succ
                            imp.fail_num = fail
                            imp.end_time = datetime.today()
                            imp.proc_status = 1
                            if imp.succ_num == 0:
                                imp.is_valid = 'F'
                            imp.save()
                        # 更新邮箱信息
                        EmailConf.updateImportTimeAndNumberByImportId(
                            importid, succ)
                        # 回写任务状态
                        msg = u'<{}>邮箱简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format(
                            siteid, total, grab, succ, ignore, fail)
                        logger.info(msg)
                        # 任务状态回写
                        if syncid:
                            task = DBTask.queryWithId(syncid)
                            if task:
                                task.succ_num += 1
                                task.sync_status = 20
                                task.log_info = msg
                                task.save()
            elif total == 0:
                if ResumeRedisCli.delete(key):
                    ImpHistory.removeByHistoryId(imphistoryid)
                    # 更新邮箱信息
                    EmailConf.updateImportTimeAndNumberByImportId(importid, 0)
                    msg = u'<{}>邮箱简历总数:0'.format(siteid)
                    logger.info(msg)
                    # 任务状态回写
                    if syncid:
                        task = DBTask.queryWithId(syncid)
                        if task:
                            task.succ_num += 1
                            task.sync_status = 20
                            task.log_info = msg
                            task.save()
예제 #25
0
파일: mailservice.py 프로젝트: cash2one/ZPB
def pull_email(emailconf, companyid, taskid, importid, syncid):
    pop, error_message = pop3(emailconf.pop3_host, emailconf.pop3_port,
                              emailconf.email_user, emailconf.email_password,
                              emailconf.is_ssl == 'T')
    if pop:
        try:
            try:
                typ, uidls, octets = pop.uidl()
            except error_proto as e:
                logger.error(u'[-] 获取邮箱<{}>状态失败,原因:{0}'.format(
                    emailconf.email_user, e))
                return
            if len(uidls) > 0:
                msgs = []
                setkey = MAIL_SET_KEY % emailconf.email_user
                # 过滤已下载邮件
                for item in uidls:
                    mid, uidl = item.split()
                    if not MailRedisCli.sismember(setkey, uidl):
                        msgs.append((mid, uidl))
                if len(msgs) > 0:
                    logger.info(u'[+] 邮箱<{}>待下载 {} 封未读邮件...'.format(
                        emailconf.email_user, len(msgs)))
                    imp = ImpHistory.new(emailconf.company_id, 0,
                                         emailconf.import_id, 3)
                    imp.src_memo = emailconf.email_user
                    if imp.save():
                        key = Conf.RESUME_IMPORT_HKEY % taskid
                        ResumeRedisCli.hmset(
                            key,
                            {
                                'total': 0,
                                'grab': 0,
                                'success': 0,
                                'ignore': 0,
                                'failure': 0,
                                'finish': 0,
                                'siteid': emailconf.email_user,  # 邮箱地址
                                'importid': importid,  # 来源id,用於追溯
                                'companyid': companyid,
                                'imphistoryid':
                                imp.history_id,  # 后续存储imp_history_resume时使用
                                'syncid': syncid
                            })
                        for mid, uidl in msgs:
                            download_email(pop, emailconf, mid, uidl, taskid)
                        ResumeRedisCli.hincrby(
                            Conf.RESUME_IMPORT_HKEY % taskid, 'finish')
                        dtc. async (
                            'zpb.service.stateservice.CheckEmailImportStat',
                            taskid)
                        logger.info(u'[+] 邮箱<{}>已下载 {} 封未读邮件!'.format(
                            emailconf.email_user, len(msgs)))
                else:
                    logger.info(u'[+] 邮箱<{}>没有未读邮件!'.format(
                        emailconf.email_user))
            else:
                logger.info(u'[-] 邮箱<{}>没有任何邮件!'.format(emailconf.email_user))
        finally:
            pop.quit()
    elif error_message:
        emailconf.is_valid = 'F'
        emailconf.import_memo = error_message
        emailconf.save()
예제 #26
0
def ParseResume(checkstatservice, **kwargs):
    data = kwargs.copy()
    taskid = data['taskid']
    companyid = data['companyid']
    siteid = data['siteid']
    jobid = data['jobid']
    source = data.get('source', 0)
    username = data['username']
    resumeid = data['resumeid']
    postdate = data['postdate']
    # 强制刷新简历(用於付费简历下载)
    force = data.get('force', False)
    # 简历与职位匹配度
    matching = data.get('matching', 0)
    #
    sitename = SiteConfig.getSiteNameById(siteid)
    importkey = Conf.RESUME_IMPORT_HKEY % taskid
    try:
        try:
            # 开始解析
            logger.info(u'正在解析<{}>简历<{}>, <{}>'.format(sitename, username,
                                                       resumeid))
            filepath = data['filepath']
            if os.path.isfile(filepath):
                # 用於文件备份
                # basename = os.path.basename(filepath)
                # dirname = os.path.dirname(filepath)
                ext = os.path.splitext(filepath)[-1]
                ret = _doResumeParseByFile(
                    base64.b64encode(open(filepath, 'rb').read()), ext)
                if ret:
                    js = json.loads(ret)
                    if js['Type'] == 0:
                        ResumeRedisCli.hincrby(importkey, 'failure')
                        logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format(
                            sitename, username, resumeid, js['Name']))
                        return
                    if not js['Name']:
                        ResumeRedisCli.hincrby(importkey, 'failure')
                        logger.error(
                            u'<{}>简历<{}, {}>解析失败,原因:文件不是一份完整的简历!'.format(
                                sitename, username, resumeid))
                        return
                    #
                    js['companyid'] = companyid
                    js['siteid'] = siteid
                    if siteid == 4:
                        if resumeid:
                            js['websiteresumeid'] = resumeid
                        elif js['WebSiteResumeID']:
                            js['websiteresumeid'] = js[
                                'WebSiteResumeID'].replace('J', '')
                        else:
                            js['websiteresumeid'] = ''
                    else:
                        js['websiteresumeid'] = resumeid if resumeid else js[
                            'WebSiteResumeID']
                    js['jobid'] = jobid
                    js['source'] = source
                    js['force'] = force
                    js['matching'] = matching
                    js['apply_job_id'] = 0
                    js['apply_time'] = postdate
                    (res, message, new) = AssembelResumeByJson(js)
                    if res:
                        logger.info(u'<{}>简历<{}, {}>解析成功!'.format(
                            sitename, username, resumeid))
                        # 简历存储
                        imphistoryid = ResumeRedisCli.hget(
                            importkey, 'imphistoryid')
                        # 简历刷新,不必新增
                        if new:
                            ResumeRedisCli.hincrby(importkey, 'success')
                            # 保存简历历史详细记录
                            if ImpHistoryResume.newAndSave(
                                    imphistoryid, companyid, message):
                                ImpHistory.incSuccessByHistoryId(imphistoryid)
                            else:
                                logger.error(u'<{}>简历<{}, {}>历史详情保存异常!'.format(
                                    sitename, username, resumeid))
                        else:
                            ResumeRedisCli.hincrby(importkey, 'ignore')
                    else:
                        ResumeRedisCli.hincrby(importkey, 'failure')
                        logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format(
                            sitename, username, resumeid, message))
                else:
                    ResumeRedisCli.hincrby(importkey, 'failure')
                    message = u'简历服务器解析简历返回结果异常,<{}><{}, {}>'.format(
                        sitename, username, resumeid)
                    logger.error(message)
            else:
                ResumeRedisCli.hincrby(importkey, 'failure')
                message = u'简历解析失败,磁盘文件<{}>不存在'.format(sitename, username,
                                                       resumeid)
                logger.error(message)
        except BaseException as e:
            dtc. async ('zpb.service.resumeservice.ParseResume',
                        checkstatservice, **data),
            logger.error(u'简历解析服务异常,message:{}'.format(e))
    finally:
        dtc. async (checkstatservice, taskid)
예제 #27
0
파일: webservice.py 프로젝트: cash2one/ZPB
def RunWebApp():
    logger.info('Http Server listen at port:<{}>'.format(options.port))
    http_server.listen(options.port)
    signal.signal(signal.SIGINT, sig_handler)
    signal.signal(signal.SIGTERM, sig_handler)
    tornado.ioloop.IOLoop.current().start()
예제 #28
0
def ParseLocalResume(companyid, taskid, importid):
    row = ImpLocalFile.queryByImportId(importid)
    if row:
        sitename = SiteConfig.getSiteNameById(row.from_site_id)
        logger.info(u'开始解析<{}>的本地简历'.format(sitename))
        row.proc_status = 10
        imp = ImpHistory.new(row.company_id, row.from_site_id, row.import_id,
                             row.input_type)
        if row.input_type == 1:
            imp.src_memo = row.user_file_name
        if not imp.save():
            return
        # 异常信息提示
        log_msg = u''
        try:
            if row.input_type == 1:
                log_msg = u'简历文件<{}>解析'.format(row.user_file_name)
                # 数据库存储类型为hex编码,此处进行解码
                content = base64.b64encode(row.file_content)
                ext = os.path.splitext(row.user_file_name)[-1]
                ret = _doResumeParseByFile(content, ext)
            else:
                log_msg = u'简历文本解析'
                content = row.input_content
                ret = _doResumeParseByString(content)
            if ret:
                js = json.loads(ret)
                if js['Type'] > 0 and js['Name']:
                    js['companyid'] = row.company_id
                    js['siteid'] = row.from_site_id
                    if js['WebSiteResumeID']:
                        js['websiteresumeid'] = js['WebSiteResumeID']
                    else:
                        js['websiteresumeid'] = 'Local{0}'.format(
                            row.import_id)
                    js['jobid'] = ''
                    js['source'] = 0
                    js['apply_job_id'] = row.apply_job_id
                    js['apply_time'] = datetime2str(datetime.today())
                    (res, message, new) = AssembelResumeByJson(js)
                    if res:
                        row.resume_code = message
                        row.proc_status = 20
                        # 保存简历历史详细记录
                        if ImpHistoryResume.newAndSave(imp.history_id,
                                                       row.company_id,
                                                       message):
                            # 保存简历历史记录
                            imp.succ_num = 1
                            imp.proc_status = 1
                            message = u'{}成功'.format(log_msg)
                            logger.info(message)
                        else:
                            message = u'数据存储失败'
                            logger.error('{}失败,{}'.format(log_msg, message))
                            imp.fail_num = 1
                            imp.proc_status = 2
                            imp.fail_reason = message
                    else:
                        logger.error('{}失败,{}'.format(log_msg, message))
                        imp.fail_num = 1
                        imp.proc_status = 2
                        imp.fail_reason = message
                else:
                    message = u'简历内容为空'
                    logger.error('{}失败,{}'.format(log_msg, message))
                    imp.fail_num = 1
                    imp.proc_status = 2
                    imp.fail_reason = message
            else:
                message = u'解析结果为空'
                logger.error('{}失败,{}'.format(log_msg, message))
                imp.fail_num = 1
                imp.proc_status = 2
                imp.fail_reason = message
        except BaseException as e:
            message = u'{}异常,原因:{}'.format(log_msg, e)
            logger.error(message)
            imp.fail_num = 1
            imp.proc_status = 2
            imp.fail_reason = u'内部服务错误!'
        # 历史结果存储
        row.save()
        imp.end_time = datetime.today()
        imp.save()