Пример #1
0
        #存進 pgdb
        function.keywords_insert_pgdb(keywords)
        function.kw_relation_insert_pgdb(keywords)
        function.doc_insert_pgdb(post_one_data,18,3) #doc,source,big_source
        function.doc_join_kw_insert_pgdb(keywords,post_one_data['href'])
        function.daily_kw_insert_pgdb(keywords,post_one_data['date'],18) #keywords,date,source_fk
        function.fb_doc_relation_keyword(post_one_data['href'],page['id']) #某粉絲團貼文與該粉絲團關聯一起

    #更新貼文
    for post_id in crawled_post_ids:
        res = requests.get('https://graph.facebook.com/v2.3/%s?access_token=%s'%(post_id,token))
        post = json.loads(res.text)
        href = 'https://www.facebook.com/'+post_id #href
        try:
            share_count = post['shares']['count'] #分享數
        except Exception as e:
            share_count = 0
            print '沒有分享數',post_id
        like_count = get_like_count(post_id)
        comments = get_comment_list(post_id)
        #更新 mongodb
        collect.update_one({'href':href},{'$set':{'share_count':share_count,'like_count':like_count,'comments':comments}})
        #更新 pgdb
        function.doc_update_pgdb(document_link=href,document_like=like_count,document_share=share_count,fb_comments=comments)
    print '新爬的文章數:',len(nocrawled_post_ids)
    print '更新的文章數:',len(crawled_post_ids)
cur.close()
conn.close()
print 'success'

Пример #2
0
    function.kw_relation_insert_pgdb(keywords)
    function.doc_insert_pgdb(doc,47,2) #doc,source,big_source
    function.doc_join_kw_insert_pgdb(keywords,href)
    function.daily_kw_insert_pgdb(keywords,date,47) #keywords,date,source_fk
    print '%d/%d'%(ind1,t1)
    ind1 += 1

ind2 = 1
t2 = len(crawled_new_links)
print '更新中...'
for link in crawled_new_links:
    #更新已抓過的文章的按讚數
    res = requests.get('http://api.facebook.com/restserver.php?method=links.getstats&format=json&urls=%s'%link)
    data = json.loads(res.text)[0]
    like_count = data['like_count']
    share_count = data['share_count']
    comment_count = data['comment_count']
    #更新 mongodb
    collect.update_one({'href':link},{'$set':{'share_count':share_count,'like_count':like_count,'comment_count':comment_count}})
    #更新 doc in pgdb
    function.doc_update_pgdb(document_link=link,document_like=like_count,document_share=share_count,comment_count=comment_count)

    print '%d/%d'%(ind2,t2)
    ind2 += 1
    
print 'success'
print '新爬的文章數:',len(nocrawled_new_links)
print '更新的文章數:',len(crawled_new_links)
function.close_pgdb()