def fetch_contact(search_data, resume_id, username, password, proxies=None): logger.info("start fetch contact with search_data: %s, \nresume_id: %s" % (search_data, resume_id)) user_agent = nautil.user_agent() logger.info("登录中......") is_login, session = Login(username, password, user_agent, proxies=proxies).login() if not is_login: return {"err_code": 101, "err_msg": session} ids = [] flag_7002 = False for resume in GetResume(session, resume_id, search_data, user_agent, proxies=proxies).get_resume_by_keywords(): # print resume["err_msg"] upload_resume = json.dumps(resume["err_msg"], ensure_ascii=False) if resume["err_code"] == 7002: # print json.dumps(resume["err_msg"], ensure_ascii=False) res = upload(upload_resume, "yifeng", get_contact=True, logger_in=logger) ids.append(res["resume_id"]) flag_7002 = True elif resume["err_code"] == 0: # print json.dumps(resume["err_msg"], ensure_ascii=False) return upload(upload_resume, "yifeng", get_contact=True, logger_in=logger) else: return resume if flag_7002: resume_ids = " ".join(ids) return {"err_code": 7002, "err_msg": "找到了%s个简历, ids: %s" %(len(ids), resume_ids)} if False: resume = GetResume(session, resume_id, search_data, user_agent, proxies=proxies).get_resume_by_id()
def fetch_contact_impl(search_data, resume_id, user_name, passwd, proxies=None, logger_name=None): logger.info("登录中.....") is_login, session = LoginJianLiKa(user_name, passwd, proxies=proxies).login() if not is_login: return session resume = __get_reusme_by_id(session, resume_id, proxies=proxies) if resume["err_code"] == 0: return upload(resume["err_msg"], "jianlika", get_contact=True, logger_in=logger) if False: #(备用,使用关键字加上公司名称获取联系方式) resume = __get_resume_by_keywords(session, search_data, resume_id, proxies=proxies) if resume["err_code"] == 0: # print resume["err_msg"] return upload(resume["err_msg"], "jianlika", get_contact=True, logger_in=logger) else: return resume
def fetch_contact_impl(search_data, resume_id, username, password, user_agent, proxies=None): logger.info("登录中......") is_login, session = Login(username, password, user_agent, proxies=proxies).login() if not is_login: return {"err_code": 101, "err_msg": session} resume = GetResume(session, resume_id, search_data, user_agent, proxies=proxies).get_resume_by_id() if False: resume = GetResume(session, resume_id, search_data, user_agent, proxies=proxies).get_resume_by_keywords() # print resume["err_msg"] if resume["err_code"] == 0: return upload(resume["err_msg"], "lie8", get_contact=True, logger_in=logger)
def __fetch_contact(session, resume_id, proxies): assert isinstance(resume_id, (str, unicode)) encrypt_resume_id = base64.b64encode(str(int(resume_id))) user_agent = nautil.user_agent() search_headers = { "Accept": "application/json, text/javascript, */*; q=0.01", "User-Agent": user_agent, "Host": "www.fenjianli.com", "Origin": "http://www.fenjianli.com", "Connection": "keep-alive", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "X-Requested-With": "XMLHttpRequest", "Referer": "http://www.fenjianli.com/search/detail.htm?ids=%s" % encrypt_resume_id, "X-Requested-With": "XMLHttpRequest" } logger.info( 'fetching resume detail >> http://www.fenjianli.com/search/detail.htm?ids=%s' % encrypt_resume_id) r = session.post('http://www.fenjianli.com/search/getDetail.htm', headers=search_headers, proxies=proxies, data={ 'id': resume_id, '_random': random.random() }) assert r.status_code == 200, r.status_code data = json.loads(r.text) assert 'originalFilePath' in data logger.info('fetching path %s' % data['originalFilePath']) raw_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', "User-Agent": user_agent, # "Host": "demo.fenjianli.com:9344", 'Upgrade-Insecure-Requests': '1', } r = session.get(data['originalFilePath'], headers=raw_headers, proxies=proxies) assert r.status_code == 200, '%s\n%s' % (r.status_code, r.content) return upload(r.content, 'fjl', get_contact=True, fjl_id=resume_id)
def fetch_contact_impl(search_data, resume_id, user_name, passwd, proxies=None, logger_name=None): if logger_name: global logger logger = logging.getLogger(logger_name) result, session = login(user_name, passwd, proxies=proxies, logger=logger) if not result: return session context = __get_resumes_by_keywords(session, search_data, resume_id) if context["err_code"] == 101: logger.error("__get_resumes_by_keywords return None") return context elif context["err_code"] == 001: logger.info("简历ID为智联老ID,但是只搜索到一个简历,下载中.....") for _resume in __get_resume_url(session, context["err_msg"], resume_id): resume = _resume if resume["err_code"] == 0: return upload(resume["err_msg"], "zhaopin", get_contact=True, logger_in=logger) else: return resume elif context["err_code"] == 002: resume_total_num = pq(context["err_msg"]).find( "div.rd-resumelist-span").find("span").text() logger.info("简历ID为智联老ID,搜索到简历%s封,下载中....." % resume_total_num) ids = [] for _resume in __get_resume_url(session, context["err_msg"], resume_id, flag_7002=True): resume = _resume if resume["err_code"] == 0: res = upload(resume["err_msg"], "zhaopin", get_contact=True, logger_in=logger) _id = res["resume_id"] ids.append(_id) resume_ids = " ".join(ids) return { "err_code": 7002, "err_msg": "找到了%s个简历, ids: %s" % (len(ids), resume_ids) } else: logger.info("搜索简历%s成功....." % resume_id) for _resume in __get_resume_url(session, context["err_msg"], resume_id): resume = _resume if resume["err_code"] == 0: return upload(resume["err_msg"], "zhaopin", get_contact=True, logger_in=logger) else: return resume
def fetch_contact_impl(args, user_name, passwd, proxies=None, logger_name=None): if logger_name: global logger logger = _logging.getLogger(logger_name) _timeout = 30 _result, _session = login(user_name, passwd, proxies=proxies) if not _result: return _session result, url = search_resume(_session, args) if result["err_code"] != 0: return result mobile = pq(result["err_msg"]).find("#mobile").text() email = pq(result["err_msg"]).find("#email").text() if mobile != '**********' and email != '**********': logger.info("联系方式已存在,开始上传简历.....") return upload(result["err_msg"], "zhuopin", get_contact=True, logger_in=logger) job_id = __get_jobid(_session, proxies=proxies) if job_id["err_code"] != 0: return job_id if "collectresumedownloadbtn" in result["err_msg"]: logger.info("获取联系方式.....") post_headers = { "Accept": "*/*", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.8", "Connection": "keep-alive", "Origin": "http://h.highpin.cn", "Referer": url, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36", } seekerUser_id = re.search(r"seekerUserID=(\d+)&resumeID", url).groups()[0] resumeid = re.search(r"resumeID=(\d+)&", url).groups()[0] post_data = { "seekerUserID": seekerUser_id, "resumeID": resumeid, "jobID": job_id["err_msg"], } try: resume_response = _session.post( "http://h.highpin.cn/ResumeManage/DownLoadResume", data=post_data, headers=post_headers, timeout=_timeout, proxies=proxies) assert resume_response assert resume_response.status_code == 200 except Exception, e: logger.warning("获取简历ID:%s, 联系方式失败:\n%s" % (re)) return {"err_code": 20019, "err_msg": "获取简历联系方式失败!"} resume_response.encoding = "utf-8" if "简历下载已成功,您的职位信息已同时发给该候选人" not in resume_response.text: logger.warning("获取简历ID%s\n失败,\n%s") time.sleep(random.uniform(1, 3)) resume = download_resume(_session, url, proxies=proxies) logger.info("获取联系方式成功,上传简历.....") return upload(resume["err_msg"], "zhuopin", get_contact=True, logger_in=logger)
def __fetch_contact(session, resume_id, user_name, user_password, proxies=None): user_agent = nautil.user_agent() proxies = None def __session(method, url, headers={}, data=None): logger.info('------\nRequesting %s On %s With Data:\n%s\n------' % (method, url, data)) # time.sleep(random.uniform(4, 15)) time.sleep(random.uniform(1, 2)) assert method in ('get', 'post') request_headers = { "User-Agent": user_agent, } for k, v in headers.iteritems(): request_headers[k] = v if method == 'get': response = session.get(url, headers=request_headers, proxies=proxies, params=data) if method == 'post': response = session.post(url, headers=request_headers, proxies=proxies, data=data) assert response assert response.status_code == 200 response.encoding = 'utf-8' return response.text main_page = __session('get', 'http://www.ganji.com/vip') if '赶集用户登录' in main_page: logger.info('cookie fail, try login') logger.info('re-login') hash_value = re.search('''window.PAGE_CONFIG.__hash__ = '([^']*)';''', main_page) assert hash_value hash_value = hash_value.group(1) logger.info('login hash_value:%s' % hash_value) check_code_url = re.search( '''<img[^>]*id="login_img_checkcode"[^>]*src=['"]*([^'"]*)['"]*[^>]*>''', main_page) assert check_code_url check_code_url = check_code_url.group(1) logger.info('login check_code_url:%s' % check_code_url) time_stamp = str(int(time.time() * 1000)) counter = 0 while True: counter += 1 if counter > 10: raise Exception('try too many times to login') login_result = __session( 'get', 'https://passport.ganji.com/login.php', headers={ "Host": "passport.ganji.com", "Referer": "https://passport.ganji.com/login.php?next=/", "X-Requested-With": "XMLHttpRequest", "Connection": "keep-alive" }, data={ "callback": "jQuery1820229177205394230_%s" % time_stamp, "username": user_name, "password": user_password, "checkCode": parse_check_code(session, check_code_url, 'ganji', proxies), "setcookies": "14", "second": "", "parentfunc": "", "redirect_in_iframe": "", "next": '/', "__hash__": hash_value, "_": time_stamp }) open(session.temp_folder + os.path.sep + 'login_result.html', 'w').write(login_result) if 'error_msg' in login_result: logger.warning('login fail with response:\n%s' % login_result) else: break logger.info('trying to buy contact') message = __session('get', 'http://www.ganji.com/findjob/download_resume.php', headers={ "Host": "www.ganji.com", "Referer": "http://www.ganji.com/jianli/%sx.htm" % resume_id, "Upgrade-Insecure-Requests": 1, }, data={ "source": "detail", "resume_type": "0", "findjob_puid": resume_id, "job_postion": "", "callback": "show_contact", "is_batch_view_resume": 0 }) open(session.temp_folder + os.path.sep + 'message.html', 'w').write(message) if '您已下载过该简历' not in message: if '简历下载数不足' in message: raise Exception('The Ganji Account Can Not Afford this Resumes') elif '此帖子已删除' in message: raise Exception('The Ganji Resume Deleted') else: assert '确认查看' in message buy_url = 'http://www.ganji.com/findjob/download_resume.php?source=detail&resume_type=0&findjob_puid=%s&job_postion=&callback=show_contact&is_batch_view_resume=0' % resume_id tel_message = __session('post', buy_url, headers={ "Host": "www.ganji.com", "Origin": "http://www.ganji.com", "Referer": buy_url, "Upgrade-Insecure-Requests": 1, }, data={"one_key_download_setting": 1}) assert 'tel-code' in tel_message logger.info('buy contact done, try upload resume') resume = __session('get', 'http://www.ganji.com/jianli/%sx.htm' % resume_id) shutil.rmtree(session.temp_folder) return upload(resume, 'ganji', get_contact=True)
def fetch_contact_impl(resume_id, user_name, passwd, proxies=None, logger_name=None): if logger_name: global logger logger = logging.getLogger(logger_name) __timeout = 30 # proxies = {'http': 'http://120.26.80.194:60762', 'https': 'http://120.26.80.194:60762'} user_agent = nautil.user_agent() result, session = login(user_name, passwd, proxies=proxies) if not result: return session session.temp_folder = os.path.join(tempfile.gettempdir(), "naren", str(random.randint(1, 10000))) if not os.path.isdir(session.temp_folder): os.makedirs(session.temp_folder) result = __fet_contanct(session, resume_id, user_agent, proxies=proxies) if result["err_code"] != 0: return result url = "http://ehire.51job.com/%s" % result["err_msg"] resume_page_result = __get_resume_page(session, url, proxies=proxies) if resume_page_result["err_code"] != 0: return resume_page_result resume_page_text = resume_page_result["err_msg"] tel_mail = pq(resume_page_text).find(".infr").text() if u"电 话:" in resume_page_text and u"E-mail:" in resume_page_text: logger.info("简历联系方式已存在") shutil.rmtree(session.temp_folder) return upload(resume_page_text, "j51", get_contact=True, logger_in=logger) if "*" not in tel_mail: logger.info("简历联系方式已存在") shutil.rmtree(session.temp_folder) return upload(resume_page_text, "j51", get_contact=True, logger_in=logger) is_download = pq(resume_page_text).find( ".btn_down[id=UndownloadLink]").attr("onclick") if not is_download: logger.warning("当前账号没有下载权限,获取简历页失败") return {"err_code": 101, "err_msg": "当前账号没有下载权限!"} if u"点击查看联系方式!" in resume_page_text and u"简历信息" in resume_page_text: post_data = { "doType": "SearchToCompanyHr", "userId": resume_id, "strWhere": "", } post_headers = { "Accept": "application/xml, text/xml, */*", "Accept-Encoding": "gzip,deflate", "Content-Type": "application/x-www-form-urlencoded", "Accept-Language": "zh-CN,zh;q=0.8", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Host": "ehire.51job.com", "Origin": "http://ehire.51job.com", "Referer": url, "X-Requested-With": "XMLHttpRequest", } logger.info("获取简历详情......") resume_text = session.post( url="http://ehire.51job.com/Ajax/Resume/GlobalDownload.aspx", headers=post_headers, data=post_data, timeout=__timeout, proxies=proxies) if u"不属于以上地区" in resume_text.text: return { "err_code": 20022, "err_msg": "对不起,您暂时不能下载该份简历,原因是:您选中的简历中存在应聘者所在地超出合同范围的情况。请核实您的情况,若有疑问请与销售或客服人员联系。" } if resume_text.status_code != 200: return {"err_code": 20019, "err_msg": "获取简历失败"} resume_text.encoding = "utf-8" resume_result = __get_resume_page(session, url, proxies=proxies) logger.info('fetch resume_id %s done, try upload resume' % resume_id) shutil.rmtree(session.temp_folder) return upload(resume_result["err_msg"], "j51", get_contact=True, logger_in=logger) else: return {"err_code": 20020, "err_msg": "抱歉,没有搜到您想找的简历!"}
def __fetch_contact(session, resume_id, user_name, user_password, proxies=None): user_agent = nautil.user_agent() proxies = None def __session(method, url, headers={}, data=None): logger.info('------\nRequesting %s On %s With Data:\n%s\n------' % (method, url, data)) time.sleep(random.uniform(4, 15)) assert method in ('get', 'post') assert method == 'post' or not data request_headers = { "User-Agent": user_agent, "Origin": "http://jianli.58.com", } for k, v in headers.iteritems(): request_headers[k] = v if method == 'get': response = session.get(url, headers=request_headers, proxies=proxies) if method == 'post': response = session.post(url, headers=request_headers, proxies=proxies, data=data) assert response assert response.status_code == 200 response.encoding = 'utf-8' return response.text main_page = __session('get', 'http://my.58.com/index') if '普通登录方式' in main_page: logger.info('cookie fail, try login') # login_cookies = get_cookie('x58', user_name) login_cookies = __login(user_name, user_password, user_agent, proxies=proxies) assert isinstance(login_cookies, list) login_cookie_jar = requests.cookies.RequestsCookieJar() for login_cookie in login_cookies: login_cookie_jar.set(login_cookie['name'], login_cookie['value'], domain=login_cookie['domain'], path=login_cookie['path']) session.cookies.update(login_cookie_jar) message = __session( 'get', 'http://jianli.58.com/resumemsg/?resumeid=%s&rand_code=%s&f=' % (resume_id, random.random())) if '您好,此求职者只允许在58同城认证营业执照的企业查看和下载' in message: raise Exception('Need Certification of Business Licence') if '您可直接查看本简历' not in message: remain = re.search( ur"""您目前共有 <span class='f-f1a'>(\d+)</span> 份简历可下载""", message) assert remain and remain.group( 1).isdigit(), 'Unexpected Message \n%s' % message remain = int(remain.group(1)) if remain < 5: raise Exception( 'The 58 Accoun Remains Only %s Resumes To Download' % remain) logger.info("获取联系方式.....") tel = __session( 'get', 'http://jianli.58.com/ajax/resumemsg/?operate=userdown&rid=%s' % resume_id, headers={ "Referer": "http://jianli.58.com/resumemsg/?resumeid=%s&rand_code=%s&f=" % (resume_id, random.random()) }) open(session.temp_folder + os.path.sep + 'tel.html', 'w').write(tel) if '您可直接查看本简历' not in tel: assert re.search('>([\d ]*)</span', tel), 'TEL NOT FOUND in html:\n%s' % tel # tel = tel.group(1).replace(' ', '') logger.info('fetch done, try upload resume') resume = __session('get', 'http://jianli.58.com/resume/%s/' % resume_id) open(session.temp_folder + os.path.sep + 'resume.html', 'w').write(resume) shutil.rmtree(session.temp_folder) return upload(resume, 'x58', get_contact=True)