Пример #1
0
def search_info(session,
                keyword="",
                start_time="",
                end_time="",
                num=1,
                location=0):
    content_text = str()
    id_text = str()
    haslink = str()
    if location != 0:
        haslink = "&haslink=1"
    for i in range(START_PAGE, START_PAGE + TOTAL_PAGE):
        url = 'http://s.weibo.com/weibo/' + keyword + '&scope=ori' + haslink + '&timescope=custom:' + start_time + ':' + end_time + '&page=' + str(
            i) + '&rd=newTips'
        sleep_time = random.randint(10, 30)
        wait_time(sleep_time)
        get_text = session.get(url).text
        get_text = u'' + get_text
        get_text = get_text.encode('utf-8')
        content_text = save_catch_page(get_text)
        pd = out_page(content_text)
        if not pd:
            return num
        num = get_page_info(content_text, session, location, num)
    return num
Пример #2
0
def search_info_by_id(session,
                      keyword="",
                      start_time="",
                      end_time="",
                      num=1,
                      location=0):
    content_text = str()
    id_text = str()
    haslink = str()
    if location != 0:
        haslink = "&haslink=1"
    for i in range(START_PAGE, START_PAGE + TOTAL_PAGE):
        url = 'http://s.weibo.com/weibo/' + keyword + '&scope=ori' + haslink + '&timescope=custom:' + start_time + ':' + end_time + '&page=' + str(
            i) + '&rd=newTips'
        sleep_time = random.randint(10, 30)
        os_sleep = 'sleep ' + str(sleep_time)
        os.system(os_sleep)
        get_text = session.get(url).text
        get_text = u'' + get_text
        get_text = get_text.encode('utf-8')
        content_text = save_catch_page(get_text)
        pd = out_page(content_text)
        if not pd:
            return num
        id_list = get_id_list(content_text, session)
        info_list = get_weibo_by_ids(id_list, session)
        status = save_data_by_db(info_list)
        lg_info(status)
    return num
Пример #3
0
def search_info(session, keyword="", start_time="", end_time="",  num=1, location=0):
    content_text = str()
    id_text = str()
    haslink = str()
    if location != 0:
        haslink = "&haslink=1"
    for i in range(START_PAGE, START_PAGE+TOTAL_PAGE):
        url = 'http://s.weibo.com/weibo/'+keyword+'&scope=ori'+haslink+'&timescope=custom:'+start_time+':'+end_time+'&page='+str(i)+'&rd=newTips'
        sleep_time = random.randint(10, 30)
        wait_time(sleep_time)
        get_text = session.get(url).text
        get_text = u'' + get_text
        get_text = get_text.encode('utf-8')
        content_text = save_catch_page(get_text)
        pd = out_page(content_text)
        if not pd:
            return num
        num = get_page_info(content_text, session, location, num)
    return num
Пример #4
0
def search_info_by_id(session, keyword="", start_time="", end_time="",  num=1, location=0):
    content_text = str()
    id_text = str()
    haslink = str()
    if location != 0:
        haslink = "&haslink=1"
    for i in range(START_PAGE, START_PAGE+TOTAL_PAGE):
        url = 'http://s.weibo.com/weibo/'+keyword+'&scope=ori'+haslink+'&timescope=custom:'+start_time+':'+end_time+'&page='+str(i)+'&rd=newTips'
        sleep_time = random.randint(10, 30)
        os_sleep = 'sleep '+str(sleep_time)
        os.system(os_sleep)
        get_text = session.get(url).text
        get_text = u'' + get_text
        get_text = get_text.encode('utf-8')
        content_text = save_catch_page(get_text)
        pd = out_page(content_text)
        if not pd:
            return num
        id_list = get_id_list(content_text, session)
        info_list = get_weibo_by_ids(id_list, session)
        status = save_data_by_db(info_list)
        lg_info(status)
    return num