示例#1
0
     print 'error5', link
 #抓新聞按讚數
 res = requests.get(
     'http://api.facebook.com/restserver.php?method=links.getstats&format=json&urls=%s'
     % link)
 data = json.loads(res.text)[0]
 like_count = data['like_count']
 share_count = data['share_count']
 #抓新聞的 comments
 try:
     comments = function.get_fb_comments(data['comments_fbid'])
 except Exception as e:
     comments = []
     print '抓取fb_comments錯誤', e
 #提取文本關鍵字
 keywords = function.keyword_extract(content)
 #存進 mongodb
 doc = {
     'author': '壹電視新聞',
     'date': date,
     'title': title,
     'content': content,
     'href': href,
     'share_count': share_count,
     'like_count': like_count,
     'comments': comments,
     'keywords': keywords
 }
 collect.insert_one(doc)
 #存進 pgdb
 function.keywords_insert_pgdb(keywords)
示例#2
0
        date = post['created_time'].replace('T',' ')
        date = date.split('+')[0]
        post_one_data['date'] = date #創建時間 (UTC+00)
        print post_one_data['date']
        try:
            post_one_data['share_count'] = post['shares']['count'] #分享數
        except Exception as e:
            post_one_data['share_count'] = 0
            print '沒有分享數',post_id

        post_one_data['like_count'] = get_like_count(post_id)
        #post_one_data['likes'] = get_like_list(post_id)
        #post_one_data['shared'] = get_shared_list(post_id) #分享名單
        post_one_data['comments'] = get_comment_list(post_id)
        #提取文本關鍵字
        keywords = function.keyword_extract(post_one_data['message'])
        post_one_data['keywords'] = keywords
        #存進 mongodb
        collect.insert_one(post_one_data)
        #存進 pgdb
        function.keywords_insert_pgdb(keywords)
        function.kw_relation_insert_pgdb(keywords)
        function.doc_insert_pgdb(post_one_data,18,3) #doc,source,big_source
        function.doc_join_kw_insert_pgdb(keywords,post_one_data['href'])
        function.daily_kw_insert_pgdb(keywords,post_one_data['date'],18) #keywords,date,source_fk
        function.fb_doc_relation_keyword(post_one_data['href'],page['id']) #某粉絲團貼文與該粉絲團關聯一起

    #更新貼文
    for post_id in crawled_post_ids:
        res = requests.get('https://graph.facebook.com/v2.3/%s?access_token=%s'%(post_id,token))
        post = json.loads(res.text)
示例#3
0
        print resu[0][0], resu[0][1]
    print '-------------------------------------------------'
'''
for o in combi(a):
    print o
print len(combi(a))
print type(combi(a))
'''

# In[ ]:

import requests
import json
import function

for k in function.keyword_extract('蔡英文明訪日 日本李登輝之友會協辦晚宴'):
    print k
res = requests.get(
    'https://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=的')
a = json.loads(res.text)
print a['responseData']['cursor']['estimatedResultCount']

# In[1]:

#兩文本相似度
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from copy import deepcopy
# 作業系統
import os