Exemplo n.º 1
0
 def __download_resume(self, id):
     urls = 'http://www.fenjianli.com/search/getDetail.htm'
     _timeout = 30
     import base64
     ids = []
     encode_id = base64.b64encode(id)
     ids.append(encode_id)
     kw = self.params.get("keywords", None)
     if kw:
         Referer = 'http://www.fenjianli.com/search/detail.htm?ids=' + ids[0] + '&kw=%s' % kw
     else:
         Referer = 'http://www.fenjianli.com/search/detail.htm?ids=' + ids[0]
     self.headers["Referer"] = Referer
     logger.info('proxies %s of Referer %s' % (self.proxies, Referer))
     resume_param = {
         "id": id,
         "_random": random.uniform(0, 1)
     }
     logger.info('headers %s of download resume' % (self.headers))
     try_times = 0
     operation_times = 0
     login_times = 0
     time.sleep(random.uniform(10, 60))
     while True:
         while True:
             try_times += 1
             try:
                 logger.warning('fetching params %s with %s' % (resume_param, self.proxies))
                 response = self.session.post(urls, data=resume_param, headers=self.headers, timeout=_timeout, proxies=self.proxies)
                 assert response.status_code == 200
                 response.encoding = 'utf-8'
                 break
             except Exception:
                 logger.warning(
                     'fetch params %s with %s fail:\n%s' % (resume_param, self.proxies, traceback.format_exc()))
                 if try_times > 5:
                     raise Exception("PROXY_FAIL!")
                 else:
                     time.sleep(30)
         if u'data-toggle="modal">登录</a>' in response.text and u'<h4 class="modal-title">用户登录</h4>' in response.text:
             self.session = contact.login(username, password, proxies=self.proxies)
             login_times += 1
             if login_times > 5:
                 raise Exception("LOGIN_ACCOUNT_ERROR!")
             continue
         if u"非法操作" in response.text:
             time.sleep(random.uniform(300, 600))
             operation_times += 1
             if operation_times > 5:
                 raise Exception("ILLEGAL_OPERATION!")
             continue
         if "is-single-list" in response.text:
             raise Exception("IS_SINGLE_LIST!")
         return response.text
Exemplo n.º 2
0
def lagou_search(params, dedup, proxies=None):
    assert username, password
    user_agent = nautil.user_agent()
    session = contact.login(username, password, user_agent, proxies)
    if __check_params(params):
        param = __splice_search_urls(session,
                                     user_agent,
                                     params,
                                     proxies=proxies)
        return spider(session, param, user_agent, dedup, proxies=proxies)
    else:
        return []
Exemplo n.º 3
0
def fjl_search(params, dedup, proxies=None):
    assert username, password
    session = contact.login(username, password, proxies=proxies)
    url = 'http://www.fenjianli.com/search/search.htm'
    __params = __splice_search_urls(params)
    get_resume = getResume(session, url, __params, dedup, proxies=proxies)
    if "scheme_flag" in params:
        if "scheme" not in params and "scheme_index" not in params:
            logger.info("params中包含shceme_flag, 但是没有scheme和scheme_index......")
            resume = get_resume.goto_resume_urls_without_scheme()
            return resume
        else:
            logger.info("params中包含shceme_flag, 且包含scheme和scheme_index.....")
            resume = get_resume.goto_resume_urls_with_scheme(params)
            return resume
    else:
        logger.info("params中没有shceme_flag.....")
        resume = get_resume.goto_resume_urls()
        return resume
Exemplo n.º 4
0
 def __search(self, param):
     try_times = 0
     time.sleep(random.uniform(10, 60))
     for connect_times in xrange(0, 5):
         while True:
             try_times += 1
             try:
                 logger.warning('fetching %s with %s data:\n%s' % (self.url, self.proxies, param))
                 response = self.session.post(self.url, data=param, headers=self.headers, timeout=30, proxies=self.proxies)
                 assert response
                 assert response.status_code == 200
             except Exception:
                 logger.warning('fetch %s with %s fail:\n%s' % (self.url, self.proxies, traceback.format_exc()))
                 if try_times > 5:
                     raise Exception("PROXY_FAIL!")
                 else:
                     time.sleep(30)
             else:
                 break
         if u"非法操作" in response.text:
             time.sleep(60)
             continue
         if u'data-toggle="modal">登录</a>' in response.text and u'<h4 class="modal-title">用户登录</h4>' in response.text:
             self.session = contact.login(username, password, proxies=self.proxies)
             time.sleep(30)
             continue
         if "totalSize" not in response.text:
             logger.warning("response with param %s , response_data: \n%s, without totalSize: \n%s" % (param, response.text, traceback.format_exc()))
             time.sleep(random.uniform(300, 600))
             continue
         if u"""'list': None""" in response.text:
             logger.error(
                 "response \n%s with params \n%s error \n%s" % (response.text, param, traceback.format_exc()))
             time.sleep(random.uniform(300, 600))
             continue
         try:
             response_results = json.loads(response.text, encoding='utf-8')
         except Exception:
             logger.error('json parse fail:\n%s\n%s' % (response.text, traceback.format_exc()))
             time.sleep(30)
             continue
         return response_results
Exemplo n.º 5
0
def x58_search(params, dedup, proxies=None):
    assert username
    urls = get_resume_list_urls(params)
    user_agent = nautil.user_agent()
    s = contact.login(username, password, user_agent, proxies=proxies)
    return get_resume_urls(s, urls, user_agent, dedup, proxies=proxies)