def test_parse_home_info(url, is_login, is_ajax, cookies, session): if is_login == 1: content = session.get(url).text if not is_ajax: assert len(home.get_data(content)) > 0 else: assert len(home.get_ajax_data(content)) > 0 else: content = requests.get(url, cookies=cookies).text if not is_ajax: assert len(home.get_data(content)) > 0 else: assert len(home.get_ajax_data(content)) > 0 time.sleep(REQUEST_INTERNAL)
def test_crawl_first_home_page(): from page_parse.home import get_ajax_data url = 'http://weibo.com/u/1800822823?is_ori=1&is_tag=0&profile_ftype=1&page=1' content = get_page(url, auth_level=1) assert "['islogin']" in content time.sleep(REQUEST_INTERNAL) cur_time = int(time.time() * 1000) ajax_url_0 = HOME_AJAX_URL.format('100505', 0, '100505', '1800822823', 1, 1, cur_time) ajax_url_1 = HOME_AJAX_URL.format('100505', 0, '100505', '1800822823', 1, 1, cur_time + 100) content = get_page(ajax_url_0, auth_level=1, is_ajax=True) assert 'Sina Visitor System' not in content assert len(get_ajax_data(content)) > 0 time.sleep(REQUEST_INTERNAL) content = get_page(ajax_url_1, auth_level=1, is_ajax=True) assert 'Sina Visitor System' not in content assert len(get_ajax_data(content)) > 0 time.sleep(REQUEST_INTERNAL)
def crawl_ajax_page(url, auth_level): """ :param url: user home ajax url :param auth_level: 1 stands for no login but need fake cookies, 2 stands for login :return: resp.text """ ajax_html = get_page(url, auth_level, is_ajax=True) ajax_wbdatas = get_ajax_data(ajax_html) if not ajax_wbdatas: return '' WbDataOper.add_all(ajax_wbdatas) return ajax_html
def crawl_ajax_page(url, auth_level): """ :param url: user home ajax url :param auth_level: 1 stands for no login but need fake cookies, 2 stands for login :return: resp.text """ ajax_html = get_page(url, auth_level, is_ajax=True) ajax_wbdatas = get_ajax_data(ajax_html) if not ajax_wbdatas: return '' timeafter = time.mktime(time.strptime(get_time_after(), '%Y-%m-%d %H:%M:%S')) for i in range(0,len(ajax_wbdatas)): weibo_time = time.mktime(time.strptime(ajax_wbdatas[i].create_time, '%Y-%m-%d %H:%M')) if weibo_time < timeafter: ajax_wbdatas = ajax_wbdatas[0:i] break WbDataOper.add_all(ajax_wbdatas) return ajax_html
def crawl_ajax_page(url, auth_level): """ :param url: user home ajax url :param auth_level: 1 stands for no login but need fake cookies, 2 stands for login :return: resp.text """ ajax_html = get_page(url, auth_level, is_ajax=True) ajax_wbdata = get_ajax_data(ajax_html) if not ajax_wbdata: return '' timeafter = time.mktime( time.strptime(get_time_after(), '%Y-%m-%d %H:%M:%S')) ajax_wbdata = [ ajax_wbdatum for ajax_wbdatum in ajax_wbdata if determine(ajax_wbdatum, timeafter) ] WbDataOper.add_all(ajax_wbdata) return ajax_html