def CheckResumeSearchDownStat(taskid): key = Conf.RESUME_IMPORT_HKEY % taskid stat = ResumeRedisCli.hgetall(key) if stat: # finish finish = int(stat.get('finish', '0')) if finish: # 统计信息 total = int(stat['total']) grab = int(stat['grab']) succ = int(stat['success']) ignore = int(stat['ignore']) fail = int(stat['failure']) # 来源信息 siteid = int(stat['siteid']) importid = stat['importid'] companyid = int(stat['companyid']) imphistoryid = int(stat['imphistoryid']) # task = ResSyncDistribute.queryWithId(importid) if task: if total > 0 and total <= succ + ignore + fail: if total <= succ + ignore + fail: if ResumeRedisCli.delete(key): imp = ImpHistory.queryByHistoryId(imphistoryid) if imp: imp.succ_num = succ imp.fail_num = fail imp.end_time = datetime.today() imp.proc_status = 1 if imp.succ_num == 0: imp.is_valid = 'F' imp.save() # 更改简历状态 ResumeBase.changeStat(task.resume_code) # 回写任务状态 msg = u'<{}>下载付费简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format( SiteConfig.getSiteNameById(siteid), total, grab, succ, ignore, fail) task.sync_status = 20 task.error_message = '' task.process_time = datetime.today() task.save() logger.info(msg) dtc. async ('zpb.service.handleservice.DoInfo', *(companyid, siteid, taskid)) elif total == 0: if ResumeRedisCli.delete(key): ImpHistory.removeByHistoryId(imphistoryid) msg = u'<{}>下载付费简历总数:0'.format( SiteConfig.getSiteNameById(siteid)) task.sync_status = 20 task.error_message = '' task.process_time = datetime.today() task.save() logger.info(msg)
def CheckResumeSearchStat(taskid): key = Conf.RESUME_IMPORT_HKEY % taskid stat = ResumeRedisCli.hgetall(key) if stat: # finish finish = int(stat.get('finish', '0')) if finish: # 统计信息 total = int(stat['total']) grab = int(stat['grab']) succ = int(stat['success']) ignore = int(stat['ignore']) fail = int(stat['failure']) # 来源信息 siteid = int(stat['siteid']) importid = stat['importid'] companyid = int(stat['companyid']) imphistoryid = int(stat['imphistoryid']) # if total > 0 and total <= succ + ignore + fail: if total <= succ + ignore + fail: if ResumeRedisCli.delete(key): imp = ImpHistory.queryByHistoryId(imphistoryid) if imp: imp.succ_num = succ imp.fail_num = fail imp.end_time = datetime.today() imp.proc_status = 1 if imp.succ_num == 0: imp.is_valid = 'F' imp.save() # 更新bind简历最后导入时间(siteid取自key,task中的site_id可能=0) AuthService().updateBindImportTimeByCompanyIdAndSiteId( companyid, siteid) # 回写任务状态 msg = u'<{}>简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format( SiteConfig.getSiteNameById(siteid), total, grab, succ, ignore, fail) logger.info(msg) elif total == 0: if ResumeRedisCli.delete(key): ImpHistory.removeByHistoryId(imphistoryid) # 更新bind简历最后导入时间(siteid取自key,task中的site_id可能=0) AuthService().updateBindImportTimeByCompanyIdAndSiteId( companyid, siteid)
def resume_import(self, importid): # 历史记录初始化 imp = ImpHistory.new(self.bind.company_id, self.bind.site_id, importid) imp.save() # 缓存记录初始化 key = Conf.RESUME_IMPORT_HKEY % self.taskid ResumeRedisCli.hmset( key, { 'total': 0, 'grab': 0, 'success': 0, 'ignore': 0, 'failure': 0, 'finish': 0, 'siteid': self.bind.site_id, # 来源招聘平台 'importid': importid, # 来源id,用於追溯 'companyid': self.bind.company_id, 'imphistoryid': imp.history_id # 后续存储imp_history_resume时使用 }) # 设置数据过期时间 ResumeRedisCli.expire(key, 60 * 60 * 24)
def CheckEmailImportStat(taskid): key = Conf.RESUME_IMPORT_HKEY % taskid stat = ResumeRedisCli.hgetall(key) if stat: # finish finish = int(stat.get('finish', '0')) if finish: # 统计信息 total = int(stat['total']) grab = int(stat['grab']) succ = int(stat['success']) ignore = int(stat['ignore']) fail = int(stat['failure']) # 来源信息 siteid = stat['siteid'] importid = stat['importid'] companyid = stat['companyid'] imphistoryid = int(stat['imphistoryid']) syncid = int(stat.get('syncid', None)) # if total > 0 and total <= succ + ignore + fail: if total <= succ + ignore + fail: if ResumeRedisCli.delete(key): imp = ImpHistory.queryByHistoryId(imphistoryid) if imp: imp.succ_num = succ imp.fail_num = fail imp.end_time = datetime.today() imp.proc_status = 1 if imp.succ_num == 0: imp.is_valid = 'F' imp.save() # 更新邮箱信息 EmailConf.updateImportTimeAndNumberByImportId( importid, succ) # 回写任务状态 msg = u'<{}>邮箱简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format( siteid, total, grab, succ, ignore, fail) logger.info(msg) # 任务状态回写 if syncid: task = DBTask.queryWithId(syncid) if task: task.succ_num += 1 task.sync_status = 20 task.log_info = msg task.save() elif total == 0: if ResumeRedisCli.delete(key): ImpHistory.removeByHistoryId(imphistoryid) # 更新邮箱信息 EmailConf.updateImportTimeAndNumberByImportId(importid, 0) msg = u'<{}>邮箱简历总数:0'.format(siteid) logger.info(msg) # 任务状态回写 if syncid: task = DBTask.queryWithId(syncid) if task: task.succ_num += 1 task.sync_status = 20 task.log_info = msg task.save()
def ParseResume(checkstatservice, **kwargs): data = kwargs.copy() taskid = data['taskid'] companyid = data['companyid'] siteid = data['siteid'] jobid = data['jobid'] source = data.get('source', 0) username = data['username'] resumeid = data['resumeid'] postdate = data['postdate'] # 强制刷新简历(用於付费简历下载) force = data.get('force', False) # 简历与职位匹配度 matching = data.get('matching', 0) # sitename = SiteConfig.getSiteNameById(siteid) importkey = Conf.RESUME_IMPORT_HKEY % taskid try: try: # 开始解析 logger.info(u'正在解析<{}>简历<{}>, <{}>'.format(sitename, username, resumeid)) filepath = data['filepath'] if os.path.isfile(filepath): # 用於文件备份 # basename = os.path.basename(filepath) # dirname = os.path.dirname(filepath) ext = os.path.splitext(filepath)[-1] ret = _doResumeParseByFile( base64.b64encode(open(filepath, 'rb').read()), ext) if ret: js = json.loads(ret) if js['Type'] == 0: ResumeRedisCli.hincrby(importkey, 'failure') logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format( sitename, username, resumeid, js['Name'])) return if not js['Name']: ResumeRedisCli.hincrby(importkey, 'failure') logger.error( u'<{}>简历<{}, {}>解析失败,原因:文件不是一份完整的简历!'.format( sitename, username, resumeid)) return # js['companyid'] = companyid js['siteid'] = siteid if siteid == 4: if resumeid: js['websiteresumeid'] = resumeid elif js['WebSiteResumeID']: js['websiteresumeid'] = js[ 'WebSiteResumeID'].replace('J', '') else: js['websiteresumeid'] = '' else: js['websiteresumeid'] = resumeid if resumeid else js[ 'WebSiteResumeID'] js['jobid'] = jobid js['source'] = source js['force'] = force js['matching'] = matching js['apply_job_id'] = 0 js['apply_time'] = postdate (res, message, new) = AssembelResumeByJson(js) if res: logger.info(u'<{}>简历<{}, {}>解析成功!'.format( sitename, username, resumeid)) # 简历存储 imphistoryid = ResumeRedisCli.hget( importkey, 'imphistoryid') # 简历刷新,不必新增 if new: ResumeRedisCli.hincrby(importkey, 'success') # 保存简历历史详细记录 if ImpHistoryResume.newAndSave( imphistoryid, companyid, message): ImpHistory.incSuccessByHistoryId(imphistoryid) else: logger.error(u'<{}>简历<{}, {}>历史详情保存异常!'.format( sitename, username, resumeid)) else: ResumeRedisCli.hincrby(importkey, 'ignore') else: ResumeRedisCli.hincrby(importkey, 'failure') logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format( sitename, username, resumeid, message)) else: ResumeRedisCli.hincrby(importkey, 'failure') message = u'简历服务器解析简历返回结果异常,<{}><{}, {}>'.format( sitename, username, resumeid) logger.error(message) else: ResumeRedisCli.hincrby(importkey, 'failure') message = u'简历解析失败,磁盘文件<{}>不存在'.format(sitename, username, resumeid) logger.error(message) except BaseException as e: dtc. async ('zpb.service.resumeservice.ParseResume', checkstatservice, **data), logger.error(u'简历解析服务异常,message:{}'.format(e)) finally: dtc. async (checkstatservice, taskid)
def ParseLocalResume(companyid, taskid, importid): row = ImpLocalFile.queryByImportId(importid) if row: sitename = SiteConfig.getSiteNameById(row.from_site_id) logger.info(u'开始解析<{}>的本地简历'.format(sitename)) row.proc_status = 10 imp = ImpHistory.new(row.company_id, row.from_site_id, row.import_id, row.input_type) if row.input_type == 1: imp.src_memo = row.user_file_name if not imp.save(): return # 异常信息提示 log_msg = u'' try: if row.input_type == 1: log_msg = u'简历文件<{}>解析'.format(row.user_file_name) # 数据库存储类型为hex编码,此处进行解码 content = base64.b64encode(row.file_content) ext = os.path.splitext(row.user_file_name)[-1] ret = _doResumeParseByFile(content, ext) else: log_msg = u'简历文本解析' content = row.input_content ret = _doResumeParseByString(content) if ret: js = json.loads(ret) if js['Type'] > 0 and js['Name']: js['companyid'] = row.company_id js['siteid'] = row.from_site_id if js['WebSiteResumeID']: js['websiteresumeid'] = js['WebSiteResumeID'] else: js['websiteresumeid'] = 'Local{0}'.format( row.import_id) js['jobid'] = '' js['source'] = 0 js['apply_job_id'] = row.apply_job_id js['apply_time'] = datetime2str(datetime.today()) (res, message, new) = AssembelResumeByJson(js) if res: row.resume_code = message row.proc_status = 20 # 保存简历历史详细记录 if ImpHistoryResume.newAndSave(imp.history_id, row.company_id, message): # 保存简历历史记录 imp.succ_num = 1 imp.proc_status = 1 message = u'{}成功'.format(log_msg) logger.info(message) else: message = u'数据存储失败' logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message else: logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message else: message = u'简历内容为空' logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message else: message = u'解析结果为空' logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message except BaseException as e: message = u'{}异常,原因:{}'.format(log_msg, e) logger.error(message) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = u'内部服务错误!' # 历史结果存储 row.save() imp.end_time = datetime.today() imp.save()
def pull_email(emailconf, companyid, taskid, importid, syncid): pop, error_message = pop3(emailconf.pop3_host, emailconf.pop3_port, emailconf.email_user, emailconf.email_password, emailconf.is_ssl == 'T') if pop: try: try: typ, uidls, octets = pop.uidl() except error_proto as e: logger.error(u'[-] 获取邮箱<{}>状态失败,原因:{0}'.format( emailconf.email_user, e)) return if len(uidls) > 0: msgs = [] setkey = MAIL_SET_KEY % emailconf.email_user # 过滤已下载邮件 for item in uidls: mid, uidl = item.split() if not MailRedisCli.sismember(setkey, uidl): msgs.append((mid, uidl)) if len(msgs) > 0: logger.info(u'[+] 邮箱<{}>待下载 {} 封未读邮件...'.format( emailconf.email_user, len(msgs))) imp = ImpHistory.new(emailconf.company_id, 0, emailconf.import_id, 3) imp.src_memo = emailconf.email_user if imp.save(): key = Conf.RESUME_IMPORT_HKEY % taskid ResumeRedisCli.hmset( key, { 'total': 0, 'grab': 0, 'success': 0, 'ignore': 0, 'failure': 0, 'finish': 0, 'siteid': emailconf.email_user, # 邮箱地址 'importid': importid, # 来源id,用於追溯 'companyid': companyid, 'imphistoryid': imp.history_id, # 后续存储imp_history_resume时使用 'syncid': syncid }) for mid, uidl in msgs: download_email(pop, emailconf, mid, uidl, taskid) ResumeRedisCli.hincrby( Conf.RESUME_IMPORT_HKEY % taskid, 'finish') dtc. async ( 'zpb.service.stateservice.CheckEmailImportStat', taskid) logger.info(u'[+] 邮箱<{}>已下载 {} 封未读邮件!'.format( emailconf.email_user, len(msgs))) else: logger.info(u'[+] 邮箱<{}>没有未读邮件!'.format( emailconf.email_user)) else: logger.info(u'[-] 邮箱<{}>没有任何邮件!'.format(emailconf.email_user)) finally: pop.quit() elif error_message: emailconf.is_valid = 'F' emailconf.import_memo = error_message emailconf.save()