Пример #1
0
        post_one_data['like_count'] = get_like_count(post_id)
        #post_one_data['likes'] = get_like_list(post_id)
        #post_one_data['shared'] = get_shared_list(post_id) #分享名單
        post_one_data['comments'] = get_comment_list(post_id)
        #提取文本關鍵字
        keywords = function.keyword_extract(post_one_data['message'])
        post_one_data['keywords'] = keywords
        #存進 mongodb
        collect.insert_one(post_one_data)
        #存進 pgdb
        function.keywords_insert_pgdb(keywords)
        function.kw_relation_insert_pgdb(keywords)
        function.doc_insert_pgdb(post_one_data,18,3) #doc,source,big_source
        function.doc_join_kw_insert_pgdb(keywords,post_one_data['href'])
        function.daily_kw_insert_pgdb(keywords,post_one_data['date'],18) #keywords,date,source_fk
        function.fb_doc_relation_keyword(post_one_data['href'],page['id']) #某粉絲團貼文與該粉絲團關聯一起

    #更新貼文
    for post_id in crawled_post_ids:
        res = requests.get('https://graph.facebook.com/v2.3/%s?access_token=%s'%(post_id,token))
        post = json.loads(res.text)
        href = 'https://www.facebook.com/'+post_id #href
        try:
            share_count = post['shares']['count'] #分享數
        except Exception as e:
            share_count = 0
            print '沒有分享數',post_id
        like_count = get_like_count(post_id)
        comments = get_comment_list(post_id)
        #更新 mongodb
Пример #2
0
        'date': date,
        'title': title,
        'content': content,
        'href': href,
        'share_count': share_count,
        'like_count': like_count,
        'comments': comments,
        'keywords': keywords
    }
    collect.insert_one(doc)
    #存進 pgdb
    function.keywords_insert_pgdb(keywords)
    function.kw_relation_insert_pgdb(keywords)
    function.doc_insert_pgdb(doc, 68, 2)  #doc,source,big_source
    function.doc_join_kw_insert_pgdb(keywords, href)
    function.daily_kw_insert_pgdb(keywords, date, 68)  #keywords,date,source_fk
    print '%d/%d' % (ind1, t1)
    ind1 += 1

ind2 = 1
t2 = len(crawled_new_links)
for link in crawled_new_links:
    try:
        res = requests.get(link)
    except Exception as e:
        print e
        continue
    soup = BeautifulSoup(res.text)
    href = link
    #更新已抓過的文章的按讚數
    res = requests.get(