示例#1
0
def CheckResumeSearchDownStat(taskid):
    key = Conf.RESUME_IMPORT_HKEY % taskid
    stat = ResumeRedisCli.hgetall(key)
    if stat:
        # finish
        finish = int(stat.get('finish', '0'))
        if finish:
            # 统计信息
            total = int(stat['total'])
            grab = int(stat['grab'])
            succ = int(stat['success'])
            ignore = int(stat['ignore'])
            fail = int(stat['failure'])
            # 来源信息
            siteid = int(stat['siteid'])
            importid = stat['importid']
            companyid = int(stat['companyid'])
            imphistoryid = int(stat['imphistoryid'])
            #
            task = ResSyncDistribute.queryWithId(importid)
            if task:
                if total > 0 and total <= succ + ignore + fail:
                    if total <= succ + ignore + fail:
                        if ResumeRedisCli.delete(key):
                            imp = ImpHistory.queryByHistoryId(imphistoryid)
                            if imp:
                                imp.succ_num = succ
                                imp.fail_num = fail
                                imp.end_time = datetime.today()
                                imp.proc_status = 1
                                if imp.succ_num == 0:
                                    imp.is_valid = 'F'
                                imp.save()
                            # 更改简历状态
                            ResumeBase.changeStat(task.resume_code)
                            # 回写任务状态
                            msg = u'<{}>下载付费简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format(
                                SiteConfig.getSiteNameById(siteid), total,
                                grab, succ, ignore, fail)
                            task.sync_status = 20
                            task.error_message = ''
                            task.process_time = datetime.today()
                            task.save()
                            logger.info(msg)
                            dtc. async ('zpb.service.handleservice.DoInfo',
                                        *(companyid, siteid, taskid))
                elif total == 0:
                    if ResumeRedisCli.delete(key):
                        ImpHistory.removeByHistoryId(imphistoryid)
                        msg = u'<{}>下载付费简历总数:0'.format(
                            SiteConfig.getSiteNameById(siteid))
                        task.sync_status = 20
                        task.error_message = ''
                        task.process_time = datetime.today()
                        task.save()
                        logger.info(msg)
示例#2
0
def CheckResumeSearchStat(taskid):
    key = Conf.RESUME_IMPORT_HKEY % taskid
    stat = ResumeRedisCli.hgetall(key)
    if stat:
        # finish
        finish = int(stat.get('finish', '0'))
        if finish:
            # 统计信息
            total = int(stat['total'])
            grab = int(stat['grab'])
            succ = int(stat['success'])
            ignore = int(stat['ignore'])
            fail = int(stat['failure'])
            # 来源信息
            siteid = int(stat['siteid'])
            importid = stat['importid']
            companyid = int(stat['companyid'])
            imphistoryid = int(stat['imphistoryid'])
            #
            if total > 0 and total <= succ + ignore + fail:
                if total <= succ + ignore + fail:
                    if ResumeRedisCli.delete(key):
                        imp = ImpHistory.queryByHistoryId(imphistoryid)
                        if imp:
                            imp.succ_num = succ
                            imp.fail_num = fail
                            imp.end_time = datetime.today()
                            imp.proc_status = 1
                            if imp.succ_num == 0:
                                imp.is_valid = 'F'
                            imp.save()
                        # 更新bind简历最后导入时间(siteid取自key,task中的site_id可能=0)
                        AuthService().updateBindImportTimeByCompanyIdAndSiteId(
                            companyid, siteid)
                        # 回写任务状态
                        msg = u'<{}>简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format(
                            SiteConfig.getSiteNameById(siteid), total, grab,
                            succ, ignore, fail)
                        logger.info(msg)
            elif total == 0:
                if ResumeRedisCli.delete(key):
                    ImpHistory.removeByHistoryId(imphistoryid)
                    # 更新bind简历最后导入时间(siteid取自key,task中的site_id可能=0)
                    AuthService().updateBindImportTimeByCompanyIdAndSiteId(
                        companyid, siteid)
示例#3
0
 def resume_import(self, importid):
     # 历史记录初始化
     imp = ImpHistory.new(self.bind.company_id, self.bind.site_id, importid)
     imp.save()
     # 缓存记录初始化
     key = Conf.RESUME_IMPORT_HKEY % self.taskid
     ResumeRedisCli.hmset(
         key,
         {
             'total': 0,
             'grab': 0,
             'success': 0,
             'ignore': 0,
             'failure': 0,
             'finish': 0,
             'siteid': self.bind.site_id,  # 来源招聘平台
             'importid': importid,  # 来源id,用於追溯
             'companyid': self.bind.company_id,
             'imphistoryid': imp.history_id  # 后续存储imp_history_resume时使用
         })
     # 设置数据过期时间
     ResumeRedisCli.expire(key, 60 * 60 * 24)
示例#4
0
def CheckEmailImportStat(taskid):
    key = Conf.RESUME_IMPORT_HKEY % taskid
    stat = ResumeRedisCli.hgetall(key)
    if stat:
        # finish
        finish = int(stat.get('finish', '0'))
        if finish:
            # 统计信息
            total = int(stat['total'])
            grab = int(stat['grab'])
            succ = int(stat['success'])
            ignore = int(stat['ignore'])
            fail = int(stat['failure'])
            # 来源信息
            siteid = stat['siteid']
            importid = stat['importid']
            companyid = stat['companyid']
            imphistoryid = int(stat['imphistoryid'])
            syncid = int(stat.get('syncid', None))
            #
            if total > 0 and total <= succ + ignore + fail:
                if total <= succ + ignore + fail:
                    if ResumeRedisCli.delete(key):
                        imp = ImpHistory.queryByHistoryId(imphistoryid)
                        if imp:
                            imp.succ_num = succ
                            imp.fail_num = fail
                            imp.end_time = datetime.today()
                            imp.proc_status = 1
                            if imp.succ_num == 0:
                                imp.is_valid = 'F'
                            imp.save()
                        # 更新邮箱信息
                        EmailConf.updateImportTimeAndNumberByImportId(
                            importid, succ)
                        # 回写任务状态
                        msg = u'<{}>邮箱简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format(
                            siteid, total, grab, succ, ignore, fail)
                        logger.info(msg)
                        # 任务状态回写
                        if syncid:
                            task = DBTask.queryWithId(syncid)
                            if task:
                                task.succ_num += 1
                                task.sync_status = 20
                                task.log_info = msg
                                task.save()
            elif total == 0:
                if ResumeRedisCli.delete(key):
                    ImpHistory.removeByHistoryId(imphistoryid)
                    # 更新邮箱信息
                    EmailConf.updateImportTimeAndNumberByImportId(importid, 0)
                    msg = u'<{}>邮箱简历总数:0'.format(siteid)
                    logger.info(msg)
                    # 任务状态回写
                    if syncid:
                        task = DBTask.queryWithId(syncid)
                        if task:
                            task.succ_num += 1
                            task.sync_status = 20
                            task.log_info = msg
                            task.save()
示例#5
0
def ParseResume(checkstatservice, **kwargs):
    data = kwargs.copy()
    taskid = data['taskid']
    companyid = data['companyid']
    siteid = data['siteid']
    jobid = data['jobid']
    source = data.get('source', 0)
    username = data['username']
    resumeid = data['resumeid']
    postdate = data['postdate']
    # 强制刷新简历(用於付费简历下载)
    force = data.get('force', False)
    # 简历与职位匹配度
    matching = data.get('matching', 0)
    #
    sitename = SiteConfig.getSiteNameById(siteid)
    importkey = Conf.RESUME_IMPORT_HKEY % taskid
    try:
        try:
            # 开始解析
            logger.info(u'正在解析<{}>简历<{}>, <{}>'.format(sitename, username,
                                                       resumeid))
            filepath = data['filepath']
            if os.path.isfile(filepath):
                # 用於文件备份
                # basename = os.path.basename(filepath)
                # dirname = os.path.dirname(filepath)
                ext = os.path.splitext(filepath)[-1]
                ret = _doResumeParseByFile(
                    base64.b64encode(open(filepath, 'rb').read()), ext)
                if ret:
                    js = json.loads(ret)
                    if js['Type'] == 0:
                        ResumeRedisCli.hincrby(importkey, 'failure')
                        logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format(
                            sitename, username, resumeid, js['Name']))
                        return
                    if not js['Name']:
                        ResumeRedisCli.hincrby(importkey, 'failure')
                        logger.error(
                            u'<{}>简历<{}, {}>解析失败,原因:文件不是一份完整的简历!'.format(
                                sitename, username, resumeid))
                        return
                    #
                    js['companyid'] = companyid
                    js['siteid'] = siteid
                    if siteid == 4:
                        if resumeid:
                            js['websiteresumeid'] = resumeid
                        elif js['WebSiteResumeID']:
                            js['websiteresumeid'] = js[
                                'WebSiteResumeID'].replace('J', '')
                        else:
                            js['websiteresumeid'] = ''
                    else:
                        js['websiteresumeid'] = resumeid if resumeid else js[
                            'WebSiteResumeID']
                    js['jobid'] = jobid
                    js['source'] = source
                    js['force'] = force
                    js['matching'] = matching
                    js['apply_job_id'] = 0
                    js['apply_time'] = postdate
                    (res, message, new) = AssembelResumeByJson(js)
                    if res:
                        logger.info(u'<{}>简历<{}, {}>解析成功!'.format(
                            sitename, username, resumeid))
                        # 简历存储
                        imphistoryid = ResumeRedisCli.hget(
                            importkey, 'imphistoryid')
                        # 简历刷新,不必新增
                        if new:
                            ResumeRedisCli.hincrby(importkey, 'success')
                            # 保存简历历史详细记录
                            if ImpHistoryResume.newAndSave(
                                    imphistoryid, companyid, message):
                                ImpHistory.incSuccessByHistoryId(imphistoryid)
                            else:
                                logger.error(u'<{}>简历<{}, {}>历史详情保存异常!'.format(
                                    sitename, username, resumeid))
                        else:
                            ResumeRedisCli.hincrby(importkey, 'ignore')
                    else:
                        ResumeRedisCli.hincrby(importkey, 'failure')
                        logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format(
                            sitename, username, resumeid, message))
                else:
                    ResumeRedisCli.hincrby(importkey, 'failure')
                    message = u'简历服务器解析简历返回结果异常,<{}><{}, {}>'.format(
                        sitename, username, resumeid)
                    logger.error(message)
            else:
                ResumeRedisCli.hincrby(importkey, 'failure')
                message = u'简历解析失败,磁盘文件<{}>不存在'.format(sitename, username,
                                                       resumeid)
                logger.error(message)
        except BaseException as e:
            dtc. async ('zpb.service.resumeservice.ParseResume',
                        checkstatservice, **data),
            logger.error(u'简历解析服务异常,message:{}'.format(e))
    finally:
        dtc. async (checkstatservice, taskid)
示例#6
0
def ParseLocalResume(companyid, taskid, importid):
    row = ImpLocalFile.queryByImportId(importid)
    if row:
        sitename = SiteConfig.getSiteNameById(row.from_site_id)
        logger.info(u'开始解析<{}>的本地简历'.format(sitename))
        row.proc_status = 10
        imp = ImpHistory.new(row.company_id, row.from_site_id, row.import_id,
                             row.input_type)
        if row.input_type == 1:
            imp.src_memo = row.user_file_name
        if not imp.save():
            return
        # 异常信息提示
        log_msg = u''
        try:
            if row.input_type == 1:
                log_msg = u'简历文件<{}>解析'.format(row.user_file_name)
                # 数据库存储类型为hex编码,此处进行解码
                content = base64.b64encode(row.file_content)
                ext = os.path.splitext(row.user_file_name)[-1]
                ret = _doResumeParseByFile(content, ext)
            else:
                log_msg = u'简历文本解析'
                content = row.input_content
                ret = _doResumeParseByString(content)
            if ret:
                js = json.loads(ret)
                if js['Type'] > 0 and js['Name']:
                    js['companyid'] = row.company_id
                    js['siteid'] = row.from_site_id
                    if js['WebSiteResumeID']:
                        js['websiteresumeid'] = js['WebSiteResumeID']
                    else:
                        js['websiteresumeid'] = 'Local{0}'.format(
                            row.import_id)
                    js['jobid'] = ''
                    js['source'] = 0
                    js['apply_job_id'] = row.apply_job_id
                    js['apply_time'] = datetime2str(datetime.today())
                    (res, message, new) = AssembelResumeByJson(js)
                    if res:
                        row.resume_code = message
                        row.proc_status = 20
                        # 保存简历历史详细记录
                        if ImpHistoryResume.newAndSave(imp.history_id,
                                                       row.company_id,
                                                       message):
                            # 保存简历历史记录
                            imp.succ_num = 1
                            imp.proc_status = 1
                            message = u'{}成功'.format(log_msg)
                            logger.info(message)
                        else:
                            message = u'数据存储失败'
                            logger.error('{}失败,{}'.format(log_msg, message))
                            imp.fail_num = 1
                            imp.proc_status = 2
                            imp.fail_reason = message
                    else:
                        logger.error('{}失败,{}'.format(log_msg, message))
                        imp.fail_num = 1
                        imp.proc_status = 2
                        imp.fail_reason = message
                else:
                    message = u'简历内容为空'
                    logger.error('{}失败,{}'.format(log_msg, message))
                    imp.fail_num = 1
                    imp.proc_status = 2
                    imp.fail_reason = message
            else:
                message = u'解析结果为空'
                logger.error('{}失败,{}'.format(log_msg, message))
                imp.fail_num = 1
                imp.proc_status = 2
                imp.fail_reason = message
        except BaseException as e:
            message = u'{}异常,原因:{}'.format(log_msg, e)
            logger.error(message)
            imp.fail_num = 1
            imp.proc_status = 2
            imp.fail_reason = u'内部服务错误!'
        # 历史结果存储
        row.save()
        imp.end_time = datetime.today()
        imp.save()
示例#7
0
def pull_email(emailconf, companyid, taskid, importid, syncid):
    pop, error_message = pop3(emailconf.pop3_host, emailconf.pop3_port,
                              emailconf.email_user, emailconf.email_password,
                              emailconf.is_ssl == 'T')
    if pop:
        try:
            try:
                typ, uidls, octets = pop.uidl()
            except error_proto as e:
                logger.error(u'[-] 获取邮箱<{}>状态失败,原因:{0}'.format(
                    emailconf.email_user, e))
                return
            if len(uidls) > 0:
                msgs = []
                setkey = MAIL_SET_KEY % emailconf.email_user
                # 过滤已下载邮件
                for item in uidls:
                    mid, uidl = item.split()
                    if not MailRedisCli.sismember(setkey, uidl):
                        msgs.append((mid, uidl))
                if len(msgs) > 0:
                    logger.info(u'[+] 邮箱<{}>待下载 {} 封未读邮件...'.format(
                        emailconf.email_user, len(msgs)))
                    imp = ImpHistory.new(emailconf.company_id, 0,
                                         emailconf.import_id, 3)
                    imp.src_memo = emailconf.email_user
                    if imp.save():
                        key = Conf.RESUME_IMPORT_HKEY % taskid
                        ResumeRedisCli.hmset(
                            key,
                            {
                                'total': 0,
                                'grab': 0,
                                'success': 0,
                                'ignore': 0,
                                'failure': 0,
                                'finish': 0,
                                'siteid': emailconf.email_user,  # 邮箱地址
                                'importid': importid,  # 来源id,用於追溯
                                'companyid': companyid,
                                'imphistoryid':
                                imp.history_id,  # 后续存储imp_history_resume时使用
                                'syncid': syncid
                            })
                        for mid, uidl in msgs:
                            download_email(pop, emailconf, mid, uidl, taskid)
                        ResumeRedisCli.hincrby(
                            Conf.RESUME_IMPORT_HKEY % taskid, 'finish')
                        dtc. async (
                            'zpb.service.stateservice.CheckEmailImportStat',
                            taskid)
                        logger.info(u'[+] 邮箱<{}>已下载 {} 封未读邮件!'.format(
                            emailconf.email_user, len(msgs)))
                else:
                    logger.info(u'[+] 邮箱<{}>没有未读邮件!'.format(
                        emailconf.email_user))
            else:
                logger.info(u'[-] 邮箱<{}>没有任何邮件!'.format(emailconf.email_user))
        finally:
            pop.quit()
    elif error_message:
        emailconf.is_valid = 'F'
        emailconf.import_memo = error_message
        emailconf.save()