def search_info(session, keyword="", start_time="", end_time="", num=1, location=0): content_text = str() id_text = str() haslink = str() if location != 0: haslink = "&haslink=1" for i in range(START_PAGE, START_PAGE + TOTAL_PAGE): url = 'http://s.weibo.com/weibo/' + keyword + '&scope=ori' + haslink + '×cope=custom:' + start_time + ':' + end_time + '&page=' + str( i) + '&rd=newTips' sleep_time = random.randint(10, 30) wait_time(sleep_time) get_text = session.get(url).text get_text = u'' + get_text get_text = get_text.encode('utf-8') content_text = save_catch_page(get_text) pd = out_page(content_text) if not pd: return num num = get_page_info(content_text, session, location, num) return num
def search_info_by_id(session, keyword="", start_time="", end_time="", num=1, location=0): content_text = str() id_text = str() haslink = str() if location != 0: haslink = "&haslink=1" for i in range(START_PAGE, START_PAGE + TOTAL_PAGE): url = 'http://s.weibo.com/weibo/' + keyword + '&scope=ori' + haslink + '×cope=custom:' + start_time + ':' + end_time + '&page=' + str( i) + '&rd=newTips' sleep_time = random.randint(10, 30) os_sleep = 'sleep ' + str(sleep_time) os.system(os_sleep) get_text = session.get(url).text get_text = u'' + get_text get_text = get_text.encode('utf-8') content_text = save_catch_page(get_text) pd = out_page(content_text) if not pd: return num id_list = get_id_list(content_text, session) info_list = get_weibo_by_ids(id_list, session) status = save_data_by_db(info_list) lg_info(status) return num
def search_info(session, keyword="", start_time="", end_time="", num=1, location=0): content_text = str() id_text = str() haslink = str() if location != 0: haslink = "&haslink=1" for i in range(START_PAGE, START_PAGE+TOTAL_PAGE): url = 'http://s.weibo.com/weibo/'+keyword+'&scope=ori'+haslink+'×cope=custom:'+start_time+':'+end_time+'&page='+str(i)+'&rd=newTips' sleep_time = random.randint(10, 30) wait_time(sleep_time) get_text = session.get(url).text get_text = u'' + get_text get_text = get_text.encode('utf-8') content_text = save_catch_page(get_text) pd = out_page(content_text) if not pd: return num num = get_page_info(content_text, session, location, num) return num
def search_info_by_id(session, keyword="", start_time="", end_time="", num=1, location=0): content_text = str() id_text = str() haslink = str() if location != 0: haslink = "&haslink=1" for i in range(START_PAGE, START_PAGE+TOTAL_PAGE): url = 'http://s.weibo.com/weibo/'+keyword+'&scope=ori'+haslink+'×cope=custom:'+start_time+':'+end_time+'&page='+str(i)+'&rd=newTips' sleep_time = random.randint(10, 30) os_sleep = 'sleep '+str(sleep_time) os.system(os_sleep) get_text = session.get(url).text get_text = u'' + get_text get_text = get_text.encode('utf-8') content_text = save_catch_page(get_text) pd = out_page(content_text) if not pd: return num id_list = get_id_list(content_text, session) info_list = get_weibo_by_ids(id_list, session) status = save_data_by_db(info_list) lg_info(status) return num