示例#1
0
文件: views.py 项目: huxiaoqian/case
def get_propagate_peak_news(topic, start_ts, end_ts):
    lis = []
    ts_lis = []
    total_days = (end_ts - start_ts) / During
    for i in range(total_days+1):
        ts = start_ts + During * i
        count = 0
        for k, v in mtype_kv.iteritems():
            dcount = ReadPropagateNews(topic, ts, During, v)
            if dcount:
                count += sum(dcount['dcount'].values())
        lis.append(float(count))
        ts_lis.append(ts2date(ts))

    if not lis or not len(lis):
        return {}

    new_zeros = detect_peaks(lis)
    time_lis = {}
    for idx, point_idx in enumerate(new_zeros):
        timestamp = ts_lis[point_idx]
        time_lis[idx] = {
            'ts': timestamp,
            'title': 'E'+str(idx)
            }
    return {'ts':ts_lis, 'count_list':lis, 'peak': time_lis}
示例#2
0
def get_news_trend_pusher(topic, start_ts, end_ts, rank_method, news_skip, news_limit_count):
    results = []
    print "topic, start_ts, end_ts, rank_method:", topic.encode("utf-8"), ts2date(start_ts), ts2date(
        end_ts
    ), rank_method

    items = (
        db.session.query(TrendPusherNews)
        .filter(TrendPusherNews.topic == topic, TrendPusherNews.start_ts == start_ts, TrendPusherNews.end_ts == end_ts)
        .all()
    )
    if not items or items == []:
        return []

    for item in items:
        row = []
        news_id = item.news_id
        news_id = deal_with(news_id)
        timestamp = item.timestamp
        comments_count = item.comments_count
        news_info = json.loads(item.news_info)

        url = news_info["url"]
        summary = news_info["summary"]
        datetime = news_info["datetime"]
        source_from_name = news_info["source_from_name"]
        content168 = news_info["content168"]
        title = news_info["title"]
        # weight = news_info['weight']
        transmit_name = news_info["transmit_name"]
        # if len(transmit_name)==0:
        #    transmit_name = u'未知'
        same_news_num = news_info["same_news_num"]
        row = [
            news_id,
            url,
            summary,
            timestamp,
            datetime,
            source_from_name,
            content168,
            title,
            same_news_num,
            transmit_name,
            comments_count,
        ]
        results.append(row)

    if rank_method == "comments_count":
        sort_results = sorted(results, key=lambda x: x[10], reverse=True)  # 评论数逆序排列
    elif rank_method == "timestamp":
        sort_results = sorted(results, key=lambda x: x[3])  # 时间戳正序排列
    # elif rank_method=='weight':
    #    sort_results = sorted(results, key=lambda x:x[10], reverse=True) # 相关度逆序排序

    return sort_results[news_skip : news_limit_count + news_skip]
示例#3
0
文件: views.py 项目: huxiaoqian/case
def user_weibo():
    """微博列表页面
    """
    # 要素
    yaosu = 'moodlens'

    # 话题关键词
    topic = request.args.get('query', default_topic)

    # 时间范围: 20130901-20130901
    time_range = request.args.get('time_range', default_timerange)

    # 时间粒度: 3600
    point_interval = request.args.get('point_interval', None)
    if not point_interval:
        point_interval = default_pointInterval
    else:
        for pi in pointIntervals:
            if pi['en'] == int(point_interval):
                point_interval = pi
                break

    weibos = []
    tar_location = u'地域未知'
    tar_nickname = u'昵称未知'
    tar_profile_image_url = '#'
    tar_followers_count = u'粉丝数未知'
    tar_friends_count = u'关注数未知'
    tar_user_url = '#'
    uid = request.args.get('uid', None)

    if uid:
        count, results = xapian_search_weibo.search(query={'user': int(uid)}, sort_by=['timestamp'], \
            fields=['id', 'user', 'text', 'reposts_count', 'comments_count', 'geo', 'timestamp'])

        for r in results():
            r['weibo_url'] = 'http://weibo.com/'
            r['user_url'] = 'http://weibo.com/u/' + str(uid)
            r['created_at'] = ts2date(r['timestamp'])
            weibos.append(r)

        user_info = acquire_user_by_id(uid)
        if user_info:
            tar_name = user_info['name']
            tar_location = user_info['location']
            tar_profile_image_url = user_info['profile_image_url']
            tar_friends_count = user_info['friends_count']
            tar_followers_count = user_info['followers_count']
            tar_user_url = 'http://weibo.com/u/' + str(uid)

    return render_template('index/weibolist.html', yaosu=yaosu, time_range=time_range, \
            topic=topic, pointInterval=point_interval, pointIntervals=pointIntervals, \
            gaishu_yaosus=gaishu_yaosus, deep_yaosus=deep_yaosus, tar_location=tar_location, \
            tar_profile_image_url=tar_profile_image_url, \
            statuses=weibos, tar_name=tar_name, tar_friends_count=tar_friends_count, \
            tar_followers_count=tar_followers_count, tar_user_url=tar_user_url)
示例#4
0
def parseNews(news):
    news_dict = {}
    news = _json_loads(news)

    if not news:
        return {}

    for weibo in news:
        try:
            _id = deal_with(weibo['_id'])
            replies = 1
            weibo['timestamp'] = ts2date(weibo['timestamp'])
            weibo['content168'] = weibo['content168']
            news_dict[_id] = [replies, weibo]
        except:
            continue

    return news_dict
示例#5
0
文件: weibos.py 项目: huxiaoqian/case
def parseWeibos(weibos):
  weibo_dict = {}
  weibos = _json_loads(weibos)

  if not weibos:
    return {}

  for weibo in weibos:
    try:
      _id = weibo['_id']
      username, profileimage = getuserinfo(weibo['user']) # get username and profile_image_url
      reposts_count = weibo['reposts_count']
      weibo['weibo_link'] = weiboinfo2url(weibo['user'], _id)
      weibo['name'] = username
      weibo['profile_image_url'] = profileimage
      weibo['date'] = ts2date(weibo['timestamp'])
      weibo_dict[_id] = [reposts_count, weibo]
    except:
      continue

  return weibo_dict
示例#6
0
def parseWeibos(weibos):
    weibo_dict = {}
    weibos = _json_loads(weibos)

    if not weibos:
        return {}

    for weibo in weibos:
        try:
            _id = weibo["_id"]
            username, profileimage = getuserinfo(weibo["user"])
            # print 'username', profileimage
            reposts_count = weibo["reposts_count"]
            # print 'reposts_count', reposts_count
            weibo["weibo_link"] = weiboinfo2url(weibo["user"], _id)
            weibo["username"] = username
            weibo["profile_image_url"] = profileimage
            weibo["timestamp"] = ts2date(weibo["timestamp"])
            # print 'weibo:', weibo
            weibo_dict[_id] = [reposts_count, weibo]
        except:
            continue
    # print 'there :', weibo_dict
    return weibo_dict
示例#7
0
def read_uid_weibos(topic, date, windowsize, uid):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - Day * windowsize
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)

    query_dict = {"user": uid}
    count, results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list)
    if count == 0:
        weibo_list = []
    else:
        weibo_list = []
        for weibo in results():
            wid = weibo["_id"]
            uid = weibo["user"]
            result = user_search.search_by_id(uid, fields=user_fields_list)
            if result:
                name = result["name"]
                location = result["location"]
                friends_count = result["friends_count"]
                followers_count = result["followers_count"]
                created_at = result["created_at"]
                statuses_count = result["statuses_count"]
                profile_image_url = result["profile_image_url"]
            else:
                name = u"未知"
                location = u"未知"
                friends_count = u"未知"
                followers_count = u"未知"
                created_at = u"未知"
                statuses_count = u"未知"
                profile_image_url = u"no"

            text = weibo["text"]
            geo = weibo["geo"]
            source = weibo["source"]
            timestamp = weibo["timestamp"]
            date = ts2date(timestamp)
            reposts_count = weibo["reposts_count"]
            comments_count = weibo["comments_count"]
            weibo_link = weiboinfo2url(uid, wid)
            domain = uid2domain(uid)

            row = [
                wid,
                uid,
                name,
                location,
                friends_count,
                followers_count,
                created_at,
                statuses_count,
                profile_image_url,
                date,
                text,
                geo,
                source,
                reposts_count,
                comments_count,
                weibo_link,
            ]
            weibo_list.append(row)

    sort_weibo_list = sorted(weibo_list, key=lambda x: x[9])
    return sort_weibo_list
示例#8
0
def c_weibo_by_ts(topic, date, windowsize, uid, network_type, cid, rank_method):
    real_topic_id = acquire_real_topic_id(topic, date, windowsize)
    if not real_topic_id:
        return None, None, None
        # 该话题存在进行下面的计算
    key_pre = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
    # 选择有向图进行社区信息的计算
    if network_type=='source_graph':
        key = str(GRAPH_PATH)+key_pre + '_gg_graph.gexf'
    elif network_type=='direct_superior_graph':
        key = str(GRAPH_PATH)+key_pre + '_ds_udg_graph.gexf'
    g = nx.read_gexf(key)
    # 获取图结构中节点uid对应的社区包括的节点list
    community_user_list = get_community_user(g, uid, cid)
    # 考虑节点社区属性存放的位置

    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - Day * windowsize
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in community_user_list:
        query_dict['$or'].append({'user': int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None

    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
 
        community_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid, timestamp])
      
    if rank_method == 'reposts_count':
        sort_community_info = sorted(community_info, key=lambda x:x[10], reverse=True) #以转发量排序
    else:
        sort_community_info = sorted(community_info, key=lambda x:x[17]) # 以时间戳排序
        
    return sort_community_info
示例#9
0
def get_info(neighbor_list, topic, date, windowsize):
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in neighbor_list:
        query_dict['$or'].append({'user': int(uid)})
    neighbor_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        neighbor_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid])
    
    sort_neighbor_info = sorted(neighbor_info, key=lambda x:x[10], reverse=True) #以转发量排序
    
    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x:x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])
   
    return sort_neighbor_info, sort_top_keyword, new_sentiment_list, query_dict
示例#10
0
def community_result(community_user_list, topic, date, windowsize):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {"$or": []}
    for uid in community_user_list:
        query_dict["$or"].append({"user": int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo["user"]
        _id = weibo["_id"]
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result["name"]
            location = result["location"]
            friends_count = result["friends_count"]
            followers_count = result["followers_count"]
            created_at = result["created_at"]
            statuses_count = result["statuses_count"]
            profile_image_url = result["profile_image_url"]
        else:
            name = u"未知"
            location = u"未知"
            friends_count = u"未知"
            followers_count = u"未知"
            created_at = u"未知"
            statuses_count = u"未知"
            profile_image_url = u"no"

        text = weibo["text"]
        timestamp = weibo["timestamp"]
        date = ts2date(timestamp)
        reposts_count = weibo["reposts_count"]
        source = weibo["source"]
        geo = weibo["geo"]
        comments_count = weibo["comments_count"]
        sentiment = weibo["sentiment"]
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append(
            [
                _id,
                name,
                location,
                friends_count,
                followers_count,
                created_at,
                statuses_count,
                profile_image_url,
                text,
                date,
                reposts_count,
                source,
                geo,
                comments_count,
                sentiment_name,
                weibo_link,
                domain,
            ]
        )

    sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True)  # 以转发量排序

    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=["terms"]), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict