def DoResumePayDown(companyid, siteid, syncid, fromsitecode, ownpay, taskid): try: if ownpay == 'T': handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) else: # 获取公用账号企业ID common_companyid = 0 handler = SiteConfig.GetTaskHandler(common_companyid, siteid, taskid) # 账号未验证通过 if handler.bind.check_status == 10: raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败') try: handler.resume_searcher_down(fromsitecode, syncid, companyid) # 任务状态回写 task = ResSyncDistribute.queryWithId(syncid) if HANDLE_STATUS.SUCCESS == handler.status: task.sync_status = 2010 logger.info(u'<{}>下载付费简历<{}>成功!'.format( handler.name, fromsitecode, handler.message)) else: task.sync_status = 10 task.error_message = handler.message logger.error(u'<{}>下载付费简历<{}>失败,原因:{}'.format( handler.name, fromsitecode, handler.message)) task.save() except BaseException as e: raise UnHandleRuntimeError(e) except BaseError as e: task = DBTask.queryWithId(syncid) task.sync_status = 10 task.fail_num += 1 task.log_info = e.message task.save()
def get(self): ret = {'status': 1, 'message': ''} try: company_id = str2int(self.get_argument('company_id', '')) site_id = str2int(self.get_argument('site_id', '')) if not (company_id and site_id): raise InvalidParamError(u'提交参数错误!') try: handler = SiteConfig.GetTaskHandler(company_id, site_id, None) try: # 开始登陆 handler.login() # 反馈结果 if handler.status == HANDLE_STATUS.SUCCESS: ret['status'] = 100 # 增加201轮询指令 DBTask.newSchedule(company_id, site_id) else: ret['message'] = handler.message except BaseException as e: raise UnHandleRuntimeError(e) logger.error(u'Web服务请求异常,原因:{}'.format(e)) except BaseError as e: ret['message'] = e.message finally: logger.debug(ret) self.write(json.dumps(ret))
def str2datetime(instr, format='%Y-%m-%d %H:%M:%S'): if instr: try: return datetime.strptime(instr.replace('/', '-'), format) except BaseException as e: logger.error(u'时间格式化异常,message:<{}>'.format(e)) return datetime.today() else: return datetime.today()
def encryptBindPasswd(salt, original): enkey = b'{0}-renSHIyi.com'.format(salt) enkey = enkey[0:8] try: iv = Random.new().read(DES.block_size) generator = DES.new(enkey, DES.MODE_ECB, iv) s = generator.encrypt(pad_it(original)) return b64encode(generator.encrypt(pad_it(original))) except BaseException as e: logger.error(u'数据加密失败,原始串:{},原因:{}'.format(original, e)) return ''
def _getHyperLink(itemname): response = None try: response = http_opener.open(baikurl.format(quote(itemname))) if response.code == 302: return response.headers.get('Location') except BaseException as e: logger.error(u'<{}>百科超链接查询失败,原因:{}'.format(itemname, str(e))) finally: if response: response.close()
def decryptBindPasswd(salt, ciphering): enkey = b'{0}-renSHIyi.com'.format(salt) enkey = enkey[0:8] try: passwdcrypt = b64decode(ciphering) iv = Random.new().read(DES.block_size) generator = DES.new(enkey, DES.MODE_ECB, iv) return generator.decrypt(passwdcrypt).rstrip('\x00') except BaseException as e: logger.error(u'数据解密失败,加密串:{},原因:{}'.format(ciphering, e)) return ''
def GetRedis(Self, redisName): pool = Self.__GetPool(redisName) redisCli = redis.StrictRedis(connection_pool=pool) try: if redisCli.ping(): return redisCli else: error_msg = u'redis服务器({})ping失败'.format(redisName) logger.error(error_msg) raise BaseException(error_msg) except redis.exceptions.ConnectionError as e: error_msg = u' redis服务器({})连接失败,原因:{}'.format(redisName, str(e)) raise BaseException(error_msg)
def _doResumeParseByFile(b64content, ext): try: soapCli = SudsClient(Conf.YGYS['soapuri'], timeout=Conf.YGYS['timeout']) except BaseException as e: logger.error(u'无法连接简历解析SOAP服务,uri:<{}>,e:<{}>'.format( Conf.YGYS['soapuri'], e)) return try: return soapCli.service.TransResumeByJsonStringForFileBase64( Conf.YGYS['username'], Conf.YGYS['password'], b64content, ext) except BaseException as e: logger.error(u'简历文件解析失败,原因:{}'.format(e))
def post(self): ret = {'status': 1, 'message': ''} try: site_id = str2int(self.get_argument('site_id', '')) member_name = self.get_argument('member_name', '') login_name = self.get_argument('login_name', '') login_pswd = self.get_argument('login_pswd', '') # 中文解码 member_name = unquote(member_name.encode('gbk')) login_name = unquote(login_name.encode('gbk')) # session = self.get_argument('sessionid', '') try: if not session: raise InvalidParamError(u'非法的请求!') if not (site_id and login_name and login_pswd): raise InvalidParamError(u'非法的请求!') messages = decryptBindPasswd('zpb', session).split(':') if len(messages) != 2: raise InvalidParamError(u'非法的请求!') companyid = messages[0] handler = SiteConfig.GetTaskHandler(0, site_id, None) # 登录密码解密 password = decryptBindPasswd(login_name, login_pswd) if not password: raise InvalidParamError(u'登录账号或密码错误!') try: # 开始登陆 (res, message) = handler.innerlogin(member_name, login_name, password) # 反馈结果 if res: if Bind.newAndSave(companyid, site_id, member_name, login_name, login_pswd): ret['status'] = 0 ret['message'] = u'账号绑定成功!' else: ret['status'] = 0 ret['message'] = u'账号已绑定!' DBTask.newSchedule(companyid, site_id) else: ret['message'] = message except BaseException as e: raise UnHandleRuntimeError(e) logger.error(u'Web服务请求异常,原因:{}'.format(e)) except BaseError as e: ret['message'] = e.message finally: logger.debug(ret) self.write(json.dumps(ret))
def DownResume(checkstatservice, **kwargs): data = kwargs.copy() taskid = data['taskid'] companyid = data['companyid'] siteid = data['siteid'] username = data['username'] resumeid = data['resumeid'] postdate = data['postdate'] # 强制刷新简历(用於付费简历下载) force = data.get('force', False) # sitename = SiteConfig.getSiteNameById(siteid) importkey = Conf.RESUME_IMPORT_HKEY % taskid try: try: # 简历更新度验证(投递日期) if not force: if not ResumeBase.isNew(companyid, siteid, resumeid, postdate): # 未更新的简历将会被忽略 logger.debug(u'<{}>简历<{}, {}>重复下载!'.format( sitename, username, resumeid)) ResumeRedisCli.hincrby(importkey, 'ignore') return try: handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) logger.info(u'正在下载<{}>简历<{}, {}>'.format( sitename, username, resumeid)) handler.resume_down(data) if handler.status == HANDLE_STATUS.SUCCESS: logger.info(handler.message) dtc. async ('zpb.service.resumeservice.ParseResume', checkstatservice, **data), elif handler.status == HANDLE_STATUS.AGAIN: logger.info(u'<{}>简历<{}, {}>需要重新下载'.format( sitename, username, resumeid)) data['retry'] = data.get('retry', 0) + 1 dtc. async ('zpb.service.resumeservice.DownResume', checkstatservice, **data), else: ResumeRedisCli.hincrby(importkey, 'failure') logger.error(handler.message) except BaseError as e: pass except BaseException as e: dtc. async ('zpb.service.resumeservice.DownResume', checkstatservice, **data), logger.error(u'<{}>简历<{}, {}>下载异常,原因:{}'.format( sitename, username, resumeid, e)) finally: dtc. async (checkstatservice, taskid)
def Do20(companyid, siteid, syncid, taskid, **kwargs): task = DBTask.queryWithId(syncid) try: handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) # 账号已解除绑定 if handler.bind.is_valid != 'T': DBTask.newSchedule(companyid, siteid, False) raise CompanyAccountUnBindError(companyid, siteid, u'账号已解除绑定') # 账号未验证通过 if handler.bind.check_status == 10: raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败') try: handler.position_import() if HANDLE_STATUS.SUCCESS == handler.status: task.sync_status = 2000 task.succ_num += 1 task.log_info = handler.message # 账号绑定 handler.bind.check_status = 50 handler.bind.login_result = '登录成功' handler.bind.last_succ_time = datetime.today() handler.bind.save() # 下发其它异步任务 infouid = md5(taskid + 'info') dtc. async ('zpb.service.handleservice.DoInfo', *(companyid, siteid, infouid), uid=infouid) resuid = md5(taskid + '201') dtc. async ('zpb.service.handleservice.Do201', *(companyid, siteid, syncid, resuid), uid=resuid) logger.info(handler.message) elif HANDLE_STATUS.AGAIN == handler.status: task.sync_status = 11 task.fail_num += 1 task.log_info = handler.message logger.error(handler.message) else: task.sync_status = 10 task.fail_num += 1 task.log_info = handler.message logger.error(handler.message) except BaseException as e: raise UnHandleRuntimeError(e) except BaseError as e: task.sync_status = 10 task.fail_num += 1 task.log_info = e.message task.save()
def execute(self, dbname, command): conn = None cur = None res = None try: conn = self.getConn(dbname) cur = conn.cursor() cur.execute(command) res = cur._cursor.rowcount conn.commit() except Exception, e: logger.error(u'execute error:%s,cause:%s' % (command, str(e))) if conn: conn.rollback() traceback.print_exc()
def testConn(self, dbname): conn = None cur = None res = False msg = "" try: conn = self.getConn(dbname) cur = conn.cursor() cur.execute("select 1") res = cur.fetchall() res = True except Exception, e: logger.error(u'testConn error:' + str(e)) traceback.print_exc() msg = e
def guard(self): logger.info('{} guarding Application at {}'.format( current_process().name, self.pid)) self.start_event.set() logger.info('Application-{} running.'.format(self.parent_pid)) cycle = 0.5 # guard loop sleep in seconds # guard loop. Runs at least once while not self.stop_event.is_set(): # Check dispatcher if not self.dispatcher.is_alive(): self.dispatcher = self.spawn_dispatcher() logger.error( 'reincarnated dispatcher {} after sudden death'.format( self.dispatcher.name)) sleep(cycle) self.stop()
def download_email(pop, emailconf, mid, uidl, taskid): logger.info(u'[*] 正在下载邮箱<{}>第<{}>封邮件'.format(emailconf.email_user, mid)) try: typ, data, octets = pop.retr(mid) msg = email.message_from_string('\n'.join(data)) except error_proto as e: logger.error(u'[*] 邮箱<{}>中的第<{}>封邮件下载失败,原因:{}'.format( emailconf.email_user, mid, e)) return # Parse and save email content/attachments try: parse_email(emailconf, msg, mid, taskid) setkey = MAIL_SET_KEY % emailconf.email_user MailRedisCli.sadd(setkey, uidl) except BaseException as e: logger.error(u'[*] 邮箱<{}>中的第<{}>封邮件解析失败,原因:{}'.format( emailconf.email_user, mid, e))
def call(self, data=None): try: if data is not None: if isinstance(data, dict): self.add_data(urlencode(data)) elif isinstance(data, basestring): self.add_data(data) ret = urllib2.urlopen(self) response = XResponse(ret, self) response.status = HTTP_STATUS.SUCCESS return response except Exception, e: msg = 'Http Request call error:{}\n{}\n'.format(self.url, format_exc(e)) logger.error(msg) response = XResponse(None, self) response.status = HTTP_STATUS.FAILURE response.msg = msg return response
def mutiExecute(self, dbname, commandList): conn = None cur = None res = [] flag = True sql = '' try: conn = self.getConn(dbname) cur = conn.cursor() for sql in commandList: cur.execute(sql) num = cur._cursor.rowcount res.append(num) conn.commit() except Exception, e: logger.error(u'execute error:%s,cause:%s' % (sql, str(e))) if conn: conn.rollback() traceback.print_exc()
def dispatcher(event): def _async_task(tasks): # 分发异步任务 for task in tasks: taskname = task.get('name', False) if taskname: logger.debug(u'dispatcher task named <{}>'.format(taskname)) args = task.get('args', ()) kwargs = task.get('kwargs', {}) kwargs['uid'] = task['id'] dtc. async (task['func'], *args, **kwargs) # name = current_process().name pid = current_process().pid logger.info('{} dispatch tasks at {}'.format(name, pid)) while not event.is_set(): try: # 任务指令中心 task_set = DBTask.queryPending() or [] _async_task(task_set) # 手动简历解析 task_set = ImpLocalFile.queryPending() or [] _async_task(task_set) # 付费简历下载 task_set = ResSyncDistribute.queryPending() or [] _async_task(task_set) # 邮件搜索定时任务 task_set = EmailConf.queryPending() or [] _async_task(task_set) # 简历搜索定时任务 task_set = ResumeSearcher.queryPending() or [] _async_task(task_set) # 关键词百科超链接 task_set = HyperLink.queryPending() or [] _async_task(task_set) # sleep(2) except BaseException as e: logger.error(e) break logger.info('{} stopped dispatch tasks'.format(name))
def PackPositionDown(grab, job): try: if grab.siteid == 1: wyjob.pack_position(grab, job) elif grab.siteid == 2: rd2.pack_position(grab, job) elif grab.siteid == 3: lag.pack_position(grab, job) elif grab.siteid == 4: cjol.pack_position(grab, job) elif grab.siteid == 5: wuba.pack_position(grab, job) elif grab.siteid == 6: ganj.pack_position(grab, job) elif grab.siteid == 7: liep.pack_position(grab, job) return True except BaseException as e: logger.error(u'<{}>下载职位<{}>失败,原因:<{}>'.format(SiteConfig.getSiteNameById(grab.siteid), grab.jobid, e)) return True
def pack_position(self, grab, job): job.company_id = grab.companyid job.from_site_id = grab.siteid job.from_site_code = grab.jobid job.job_name = grab.jobtitle job.job_mode = 1 if grab.worktype == u'全职' else 2 job.job_status = 10 job.is_valid = 'T' if grab.status == 10 else 'F' job.start_date = datetime.date(grab.datepublish) job.publish_date = grab.datepublish if grab.positiontype2: job.job_type_level_3 = dict_info('LAG:JobType', grab.positiontype2) job.job_type_level_2 = int(job.job_type_level_3[0: -2]) job.job_type_level_1 = dict_info('SubJobType', job.job_type_level_2) elif grab.positiontype: job.job_type_level_3 = dict_info('LAG:JobType', grab.positiontype) job.job_type_level_2 = int(job.job_type_level_3[0: -2]) job.job_type_level_1 = dict_info('SubJobType', job.job_type_level_2) # 模糊匹配城市 citykey = blur_key('Common:City', grab.workarea) if citykey: cityjson = dict_info('Common:City', citykey) cityinfo = json.loads(cityjson) job.location_city_id = cityinfo['cityid'] job.location_province_id = cityinfo['provinceid'] else: job.location_province_id = 4241 logger.error(u'拉勾职位工作城市{0}没有模糊匹配结果!'.format(grab.workarea)) job.location_detail = '' # 薪资范围 avgsalary = int(round(grab.salarymin * 1000.00 + grab.salarymax * 1000.00) / 2) if avgsalary > 0: job.salary_id = getSalaryIdBySalaryValue(avgsalary) job.recruit_num = 0 # -1=学历不限 job.lmt_edu_id = dict_info('LAG:Degree', grab.education, '0') job.lmt_work = dict_info('LAG:WorkYear', grab.workexperience, '371006') job.lmt_sex_ids = '391012' job.receiver_emails = grab.emails.replace(',', ';') job.work_responsibility = clearHtmlTag(grab.jobdesc.lstrip("'").rstrip("'")) job.last_sync_time = datetime.today()
def callReConn(self, dbname): ret = False if DB_CONFIG.has_key(dbname): if self.__DbManagerDict.has_key(dbname): try: print "%s: now try to reconnect Database(%s)!" % ( datetime.datetime.now(), dbname) #_dbManager = self.__DbManagerDict[dbname] _dbManager = TQDbManager(dbname) self.__DbManagerDict[dbname] = _dbManager print u"%s reconnect database(%s) success!" % ( datetime.datetime.now(), dbname) ret = True except Exception, e: logger.error(u'callReConn error:' + dbname + ' cause:' + str(e)) traceback.print_exc() print u"%s reconnect database(%s) failed!" % ( datetime.datetime.now(), dbname) finally: return ret
def Do201(companyid, siteid, syncid, taskid, **kwargs): task = DBTask.queryWithId(syncid) try: handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) # 账号已解除绑定 if handler.bind.is_valid != 'T': DBTask.newSchedule(companyid, siteid, False) raise CompanyAccountUnBindError(companyid, siteid, u'账号已解除绑定') # 账号未验证通过 if handler.bind.check_status == 10: raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败') try: handler.resume_import(syncid) except BaseException as e: raise UnHandleRuntimeError(e) if HANDLE_STATUS.SUCCESS == handler.status: task.succ_num += 1 task.log_info = handler.message task.sync_status = 20 # dtc. async ('zpb.service.handleservice.DoInfo', *(companyid, siteid, taskid)) logger.info(handler.message) else: task.fail_num += 1 task.log_info = handler.message if HANDLE_STATUS.AGAIN == handler.status: task.sync_status = 11 else: task.sync_status = 10 logger.error(handler.message) except BaseError as e: task.sync_status = 10 task.fail_num += 1 task.log_info = e.message task.save()
def pop3(host, port, username, password, usessl): try: conn = poplib.POP3_SSL(host) if usessl else poplib.POP3(host) if debug_mode: conn.set_debuglevel(2) conn.user(username) conn.pass_(password) logger.info(u'[+] 邮箱<{}> 登录成功'.format(username)) return conn, '' except error_proto: error_message = u'登录账号或密码错误' logger.error(u'[+] 邮箱<{}> 登录失败,原因:'.format(username, error_message)) return None, error_message except socket.gaierror: error_message = u'非法POP3服务器名' logger.error(u'[+] 邮箱<{}> 登录失败,原因:<{}>为{}'.format( username, host, error_message)) return None, error_message except BaseException as e: # 可能网络异常,可以再次执行 logger.error(u'[+] 邮箱<{}> 登录失败,原因:{}'.format(username, str(e))) return None, None
def post(self): ret = {'status': 1, 'message': ''} try: # 提取post参数中name=file的文件元数据 if self.request.files.has_key('file'): # try: soapCli = SudsClient(Conf.YGYS['soapuri'], timeout=Conf.YGYS['timeout']) except BaseException as e: logger.error(e) ret['message'] = u'无法连接简历解析WebService服务' return # try: sitecode = self.get_argument('sitefromcode', '0') maildate = self.get_argument('maildate', '') filedata = self.request.files['file'][0] filename = filedata['filename'] logger.info(u'解析简历文件<{}>'.format(filename)) # 文件扩展名 ext = os.path.splitext(filename)[-1] if not ext: ext = '.text' res = soapCli.service.TransResumeByJsonStringForFileBase64( Conf.YGYS['username'], Conf.YGYS['password'], base64.b64encode(filedata['body']), ext) if res: js = json.loads(res) if js['Type'] == 0: ret['message'] = js['Name'] elif not js['Name']: ret['message'] = u'非完整简历' else: js['companyid'] = 0 js['jobid'] = 0 js['source'] = 0 js['apply_job_id'] = 0 js['siteid'] = sitecode js['apply_time'] = maildate js['websiteresumeid'] = js['WebSiteResumeID'] js['matching'] = 0 (res, message, new) = AssembelResumeByJson(js) if res: data = ResumeBase.queryAndExportByResumeCode( message) ret['status'] = 0 ret['count'] = 1 ret['message'] = u'简历文件解析成功!' ret['res_resume_base'] = data else: ret['message'] = message else: ret['message'] = u'解析结果空白!' except BaseException as e: logger.error(e) ret['message'] = u'简历解析内部服务错误!' else: ret['message'] = u'未上传需要解析的简历文件!' finally: if ret['status'] == 1: logger.error(ret['message']) self.write(json.dumps(ret))
def AssembelResumeByJson(js): """ : params json js : return (bool, str, bool)->(处理状态,错误原因或简历ID,是否新简历) """ # 噪声过滤 if js['Sex'] == '' and js['Age'] == '': if js['LastUpdate'] == '199001': return (False, u'非法的简历!', False) if js['siteid'] == 0: return (False, u'非法的简历!', False) # session = DBInstance.session try: # 强制刷新简历(用於付费简历下载) force = js.get('force', False) (ret, rm) = ResumeBase.queryAndCreate(session, js['companyid'], js['siteid'], js['websiteresumeid'], js['apply_time']) if not ret and not force: return (True, rm.resume_code, False) # 填充简历 rm.apply_job_status = 0 rm.apply_job_id = 0 # 简历关联的职位ID,用最新的覆盖 if js['jobid']: rm.from_site_jobcode = js['jobid'] if not rm.from_site_jobcode: rm.from_site_jobcode = '' # 来源可能制定本系统关联职位ID(imp_local_file) if js['apply_job_id']: rm.apply_job_id = js['apply_job_id'] rm.apply_job_status = 1 else: # 依据招聘平台职位ID查找本系统职位ID if rm.from_site_jobcode: jobid = JobSyncDistribute.queryJobIdByThirdJobCode(rm.company_id, rm.from_site_id, rm.from_site_jobcode) if jobid: rm.apply_job_status = 1 rm.apply_job_id = jobid # 判定简历投递时间 if rm.apply_job_status == 1: if rm.apply_time <= dateAdd(day=-7): rm.pre_apply_job_id = rm.apply_job_id rm.apply_job_id = 0 rm.apply_job_status = 0 # rm.resume_type = js['Type'] rm.resume_grade = js['ResumeGrade'] rm.married_status = js['Married'] rm.id_no = js['IDNO'] rm.get_encouragement = js['Encouragement'].replace('\r\n', '<br />') rm.join_team = js['Team'].replace('\r\n', '<br />') rm.volunteer_info = js['Volunteer'] rm.graduate_year = str2int(js['Graduatetime'][0: 4]) rm.graduate_month = str2int(js['Graduatetime'][5: 7]) rm.begin_work_year = str2int(js['Beginworktime'][0: 4]) rm.begin_work_month = str2int(js['Beginworktime'][5: 7]) rm.last_update = js['LastUpdate'] rm.third_score = js['Score'] rm.certificate_name = js['Certificate'].replace('\r\n', '<br />') rm.person_memo = js['Personal'].replace('\r\n', '<br />') rm.lesson_name = js['Lesson'] rm.computer_level = js['Computer'] rm.english_level = js['English'] rm.graduate_school = js['School'] rm.school_rankings = js['SchoolRankings'] rm.addr_postcode = js['PostCode'] rm.speciality_name = js['Speciality'] rm.contact_addr = js['Address'] rm.native_place = js['Jiguan'] rm.national_name = js['National'] rm.nationality_name = js['Nationality'] rm.birth_day = js['Birth'] # 来源于简历搜索器(简历解析服务器返回结果有噪声) if js['source'] == 9: if rm.source is None or rm.source == 9: rm.person_name = '' rm.family_name = '' else: rm.person_name = js['Name'] rm.family_name = js['FamilyName'] # rm.person_href = js['Href'] rm.hope_title = js['Title'] rm.hope_title2 = js['Title2'] rm.title_standard = js['TitleStandard'] rm.aim_institution = js['AimInstitution'] rm.person_age = str2int(js['Age']) rm.person_sex = js['Sex'] rm.body_high = js['High'] rm.body_weight = js['Weight'] if js['Mobile']: rm.mobile_no = js['Mobile'] if rm.mobile_no is None: rm.mobile_no = '' if js['Phone']: rm.other_phone = js['Phone'] if rm.other_phone is None: rm.other_phone = '' rm.fax_no = js['Fax'] if js['Email']: rm.email_addr = js['Email'] if rm.email_addr is None: rm.email_addr = '' if rm.email_addr == '*****@*****.**': rm.email_addr = '' rm.now_location = js['NowLocation'] rm.hope_location = js['Forwardlocation'] rm.high_education = js['Education'] if rm.high_education == u'大專': rm.high_education = u'大专' rm.high_edu_id = getEduHighId(rm.high_education) rm.advance_degree = js['AdvancedDegree'] rm.exp_name = '' rm.now_vocation = js['Vocation'] rm.hope_vocation = js['ForwardVocation'] rm.vocation_standard = js['VocationStandard'] rm.now_salary = js['Salary'] rm.hope_salary = js['AimSalary'] rm.political_name = js['Political'] rm.start_from = js['StartFrom'] rm.apply_switch = js['Switch'] rm.qq_no = js['QQ'] rm.student_type = js['StudentType'] rm.photo_url = js['PhotoUrl'] rm.apply_letter = js['AppLetter'] rm.last_company = js['LastCompany'] rm.last_title = js['LastTitle'] rm.overseas_work = js['OverseasWork'] rm.job_hope_frequency = js['JobHoppingFrequency'] rm.integrity_ratio = js['Integrity'] rm.work_type = js['WorkType'] rm.birth_date = getBirthDay(js['Birth']) rm.work_years = str2int(js['Experience']) rm.source = js.get('source', 0) rm.matching_degree = js.get('matching', 0) rm.is_valid = 'T' rm.modify_user = '******' rm.modify_time = datetime.today() session.add(rm) # 简历扩展 rext = ResumeExtend() rext.resume_code = rm.resume_code rext.org_resume = fmtHtml(js['Original']) rext.edu_detail_full = js['EducationDetail'].replace('\r\n', '<br />') rext.exp_detail_full = js['ExperienceDetail'].replace('\r\n', '<br />') rext.train_detail_full = js['Training'].replace('\r\n', '<br />') rext.proj_detail_full = js['Project'].replace('\r\n', '<br />') rext.skill_detail_full = js['Skill'].replace('\r\n', '<br />') rext.is_valid = 'T' rext.create_user = '******' rext.create_time = datetime.today() session.add(rext) # if js['EducationInfo']: for (idx, item) in enumerate(js['EducationInfo']): redu = ResumeEdu() redu.resume_code = rm.resume_code redu.start_date = item['StartDate'] redu.end_date = item['EndDate'] redu.school_name = item['School'] redu.major_name = item['Speciality'] redu.adv_degree = item['AdvancedDegree'] redu.diplomas_name = item['Education'] redu.depart_name = item['Department'] redu.edu_summary = item['Summary'].replace('\r\n', '<br />') redu.is_studii = item['IsStudii'] redu.is_valid = 'T' session.add(redu) # 异步增加百科超链接 dtc.async('zpb.business.model.hyperlink.AppendSchoolLink', redu.school_name) # if js['ExperienceInfo']: for (idx, item) in enumerate(js['ExperienceInfo']): rexp = ResumeExp() rexp.resume_code = rm.resume_code rexp.start_date = item['StartDate'] rexp.end_date = item['EndDate'] rexp.periods_of_time = item['PeriodsOfTime'] # 最近工作的时间长度写入resume_base表 if idx == 0: rm.periods_of_time = rexp.periods_of_time rexp.company_name = item['Company'] rexp.work_location = item['Location'] rexp.vocation_name = item['Vocation'] rexp.company_scale = item['Size'] rexp.company_type = item['Type'] rexp.depart_name = item['Department'] rexp.work_title = item['Title'] rexp.salary_name = item['Salary'] rexp.work_summary = item['Summary'].replace('\r\n', '<br />') rexp.leader_name = item['Leader'] rexp.underling_num = str2int(item['UnderlingNumber']) rexp.leaving_reason = item['ReasonOfLeaving'] rexp.is_valid = 'T' session.add(rexp) # 异步增加百科超链接 dtc.async('zpb.business.model.hyperlink.AppendCompanyLink', rexp.company_name) # if js['TrainingInfo']: for item in js['TrainingInfo']: rtra = ResumeTrain() rtra.resume_code = rm.resume_code rtra.start_date = item['StartDate'] rtra.end_date = item['EndDate'] rtra.train_institution = item['TrainingInstitution'] rtra.train_location = item['TrainingLocation'] rtra.train_course = item['TrainingCourse'].replace('\r\n', '<br />') rtra.train_certificate = item['Certificate'] rtra.train_desc = item['DescriptionInDetails'].replace('\r\n', '<br />') rtra.is_valid = 'T' session.add(rtra) # if js['ProjectInfo']: for item in js['ProjectInfo']: rpro = ResumeProject() rpro.resume_code = rm.resume_code rpro.start_date = item['StartDate'] rpro.end_date = item['EndDate'] rpro.project_name = item['ProjectName'] rpro.work_title = item['Title'] rpro.proj_desc = item['ProjectDescription'].replace('\r\n', '<br />') rpro.work_responsibility = item['Responsibilities'].replace('\r\n', '<br />') rpro.is_valid = 'T' session.add(rpro) # if js['LanguagesSkills']: for item in js['LanguagesSkills']: rlan = ResumeLang() rlan.resume_code = rm.resume_code rlan.lang_name = item['Languages'] rlan.listen_speak = item['ListeningSpeakingSkills'] rlan.write_read = item['ReadingWritingSkills'] rlan.lang_score = item['Score'] rlan.lang_skill = item['Skills'] rlan.is_valid = 'T' session.add(rlan) # if js['ITSkills']: for item in js['ITSkills']: rit = ResumeIT() rit.resume_code = rm.resume_code rit.skill_name = item['SkillType'] rit.use_time = item['TimeOfUse'] rit.competency_level = item['CompetencyLevel'] rit.is_valid = 'T' session.add(rit) # if js['GradeOfEnglish']: item = js['GradeOfEnglish'] if item['NameOfCertificate']: reng = ResumeEnglish() reng.resume_code = rm.resume_code reng.certificate_name = item['NameOfCertificate'] reng.certificate_score = item['Score'] reng.rec_date = item['ReceivingDate'] reng.is_valid = 'T' session.add(reng) session.commit() resume_code = rm.resume_code return (True, resume_code, True) except BaseException as e: session.rollback() logger.error(u'简历解析失败,原因:{}'.format(e)) return (False, u'简历解析失败!', False) finally: session.close()
def __init__(self, company, siteid, message): super(InvalidParamError, self).__init__(company, siteid, message) self.code = 20010 logger.error(self)
def ParseResume(checkstatservice, **kwargs): data = kwargs.copy() taskid = data['taskid'] companyid = data['companyid'] siteid = data['siteid'] jobid = data['jobid'] source = data.get('source', 0) username = data['username'] resumeid = data['resumeid'] postdate = data['postdate'] # 强制刷新简历(用於付费简历下载) force = data.get('force', False) # 简历与职位匹配度 matching = data.get('matching', 0) # sitename = SiteConfig.getSiteNameById(siteid) importkey = Conf.RESUME_IMPORT_HKEY % taskid try: try: # 开始解析 logger.info(u'正在解析<{}>简历<{}>, <{}>'.format(sitename, username, resumeid)) filepath = data['filepath'] if os.path.isfile(filepath): # 用於文件备份 # basename = os.path.basename(filepath) # dirname = os.path.dirname(filepath) ext = os.path.splitext(filepath)[-1] ret = _doResumeParseByFile( base64.b64encode(open(filepath, 'rb').read()), ext) if ret: js = json.loads(ret) if js['Type'] == 0: ResumeRedisCli.hincrby(importkey, 'failure') logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format( sitename, username, resumeid, js['Name'])) return if not js['Name']: ResumeRedisCli.hincrby(importkey, 'failure') logger.error( u'<{}>简历<{}, {}>解析失败,原因:文件不是一份完整的简历!'.format( sitename, username, resumeid)) return # js['companyid'] = companyid js['siteid'] = siteid if siteid == 4: if resumeid: js['websiteresumeid'] = resumeid elif js['WebSiteResumeID']: js['websiteresumeid'] = js[ 'WebSiteResumeID'].replace('J', '') else: js['websiteresumeid'] = '' else: js['websiteresumeid'] = resumeid if resumeid else js[ 'WebSiteResumeID'] js['jobid'] = jobid js['source'] = source js['force'] = force js['matching'] = matching js['apply_job_id'] = 0 js['apply_time'] = postdate (res, message, new) = AssembelResumeByJson(js) if res: logger.info(u'<{}>简历<{}, {}>解析成功!'.format( sitename, username, resumeid)) # 简历存储 imphistoryid = ResumeRedisCli.hget( importkey, 'imphistoryid') # 简历刷新,不必新增 if new: ResumeRedisCli.hincrby(importkey, 'success') # 保存简历历史详细记录 if ImpHistoryResume.newAndSave( imphistoryid, companyid, message): ImpHistory.incSuccessByHistoryId(imphistoryid) else: logger.error(u'<{}>简历<{}, {}>历史详情保存异常!'.format( sitename, username, resumeid)) else: ResumeRedisCli.hincrby(importkey, 'ignore') else: ResumeRedisCli.hincrby(importkey, 'failure') logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format( sitename, username, resumeid, message)) else: ResumeRedisCli.hincrby(importkey, 'failure') message = u'简历服务器解析简历返回结果异常,<{}><{}, {}>'.format( sitename, username, resumeid) logger.error(message) else: ResumeRedisCli.hincrby(importkey, 'failure') message = u'简历解析失败,磁盘文件<{}>不存在'.format(sitename, username, resumeid) logger.error(message) except BaseException as e: dtc. async ('zpb.service.resumeservice.ParseResume', checkstatservice, **data), logger.error(u'简历解析服务异常,message:{}'.format(e)) finally: dtc. async (checkstatservice, taskid)
#_dbManager = self.__DbManagerDict[dbname] _dbManager = TQDbManager(dbname) self.__DbManagerDict[dbname] = _dbManager print u"%s reconnect database(%s) success!" % ( datetime.datetime.now(), dbname) ret = True except Exception, e: logger.error(u'callReConn error:' + dbname + ' cause:' + str(e)) traceback.print_exc() print u"%s reconnect database(%s) failed!" % ( datetime.datetime.now(), dbname) finally: return ret else: logger.error(u'未约定的数据库连接名称(%s)' % dbname) return ret @classmethod def testConn(self, dbname): conn = None cur = None res = False msg = "" try: conn = self.getConn(dbname) cur = conn.cursor() cur.execute("select 1") res = cur.fetchall() res = True except Exception, e:
def ParseLocalResume(companyid, taskid, importid): row = ImpLocalFile.queryByImportId(importid) if row: sitename = SiteConfig.getSiteNameById(row.from_site_id) logger.info(u'开始解析<{}>的本地简历'.format(sitename)) row.proc_status = 10 imp = ImpHistory.new(row.company_id, row.from_site_id, row.import_id, row.input_type) if row.input_type == 1: imp.src_memo = row.user_file_name if not imp.save(): return # 异常信息提示 log_msg = u'' try: if row.input_type == 1: log_msg = u'简历文件<{}>解析'.format(row.user_file_name) # 数据库存储类型为hex编码,此处进行解码 content = base64.b64encode(row.file_content) ext = os.path.splitext(row.user_file_name)[-1] ret = _doResumeParseByFile(content, ext) else: log_msg = u'简历文本解析' content = row.input_content ret = _doResumeParseByString(content) if ret: js = json.loads(ret) if js['Type'] > 0 and js['Name']: js['companyid'] = row.company_id js['siteid'] = row.from_site_id if js['WebSiteResumeID']: js['websiteresumeid'] = js['WebSiteResumeID'] else: js['websiteresumeid'] = 'Local{0}'.format( row.import_id) js['jobid'] = '' js['source'] = 0 js['apply_job_id'] = row.apply_job_id js['apply_time'] = datetime2str(datetime.today()) (res, message, new) = AssembelResumeByJson(js) if res: row.resume_code = message row.proc_status = 20 # 保存简历历史详细记录 if ImpHistoryResume.newAndSave(imp.history_id, row.company_id, message): # 保存简历历史记录 imp.succ_num = 1 imp.proc_status = 1 message = u'{}成功'.format(log_msg) logger.info(message) else: message = u'数据存储失败' logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message else: logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message else: message = u'简历内容为空' logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message else: message = u'解析结果为空' logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message except BaseException as e: message = u'{}异常,原因:{}'.format(log_msg, e) logger.error(message) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = u'内部服务错误!' # 历史结果存储 row.save() imp.end_time = datetime.today() imp.save()
def Do101(companyid, siteid, syncid, taskid, **kwargs): task = DBTask.queryWithId(syncid) try: jobid = kwargs.pop('jobid', None) if not jobid: raise InvalidParamError(companyid, siteid, u'未指定发布职位编号') handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) # 账号已解除绑定 if handler.bind.is_valid != 'T': DBTask.newSchedule(companyid, siteid, False) raise CompanyAccountUnBindError(companyid, siteid, u'账号已解除绑定') # 账号未验证通过 if handler.bind.check_status == 10: raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败') # step 1 dist = JobSyncDistribute.queryByJobIdAndCompanyIdWithSiteId( jobid, companyid, siteid) if not dist: raise JobNotDistributeError(companyid, siteid, u'未找到职位同步记录') try: if not dist.third_job_code: handler.position_add(jobid) else: handler.position_modify(jobid, dist.third_job_code, dist.last_sync_time) # step 2 if HANDLE_STATUS.SUCCESS == handler.status: if hasattr(handler, 'thirdjobcode'): dist.third_job_code = handler.thirdjobcode dist.sync_succ_num += 1 dist.sync_status = 20 dist.error_message = '' dist.last_sync_time = datetime.today() # BaseJob.updateSyncTimeByJobId(jobid) # dtc. async ('zpb.service.handleservice.DoInfo', *(companyid, siteid, taskid)) else: dist.sync_fail_num += 1 dist.sync_status = 10 dist.error_message = handler.message dist.save() # step 3 jmm = JobMemo(jobid) if HANDLE_STATUS.SUCCESS == handler.status: jmm.memo_content = u'[{}]发布成功'.format(handler.name) else: jmm.memo_content = u'[{}]发布失败,{}'.format( handler.name, handler.message) jmm.save() # step 4 if HANDLE_STATUS.SUCCESS == handler.status: task.succ_num += 1 task.log_info = handler.message task.sync_status = 20 logger.info(handler.message) else: task.fail_num += 1 task.log_info = handler.message if HANDLE_STATUS.AGAIN == handler.status: task.sync_status = 11 else: task.sync_status = 10 logger.error(handler.message) except BaseException as e: raise UnHandleRuntimeError(e) except BaseError as e: task.sync_status = 10 task.fail_num += 1 task.log_info = e.message task.save()