예제 #1
0
def read_trend_pusher(topic, date, windowsize, rank_method):
    results = []
    #print 'topic, date, windowsize:', topic.encode('utf-8'), date, windowsize
    items = db.session.query(TrendPusher).filter(TrendPusher.topic==topic ,\
                                                 TrendPusher.date==date ,\
                                                 TrendPusher.windowsize==windowsize).all()
    #print 'len(items):', len(items)
    if items:
        for item in items:
            uid = item.uid
            timestamp = item.timestamp
            timestamp = ts2date(timestamp)
            user_info = json.loads(item.user_info)
            uname = user_info['name']
            location = user_info['location']
            profile_image_url = user_info['profile_image_url']
            if profile_image_url == u'未知':
                profile_image_url = 'no'
            friends_count = user_info['friends_count']
            if friends_count == u'未知':
                friends_count = -1
            followers_count = user_info['followers_count']
            statuses_count = user_info['statuses_count']
            if statuses_count == u'未知':
                statuses_count = -1
            created_at = user_info['created_at']

            weibo_info = json.loads(item.weibo_info)
            text = weibo_info['text']
            geo = weibo_info['geo']
            source = weibo_info['source']
            _id = weibo_info['_id']
            reposts_count = weibo_info['reposts_count']
            weibo_link = weiboinfo2url(uid, _id)
            user_domain = item.domain
            domain_name = domain_dict[user_domain]
            rank = item.rank
            row = [
                rank, uid, uname, location, domain_name, timestamp, text,
                profile_image_url, friends_count, followers_count,
                statuses_count, created_at, geo, source, weibo_link, _id,
                reposts_count
            ]
            results.append(row)
    sort_result = results
    if rank_method == 'reposts_count':
        rank_results = results
        sort_result = deal_rank(rank_results)
    elif rank_method == 'timestamp':
        sort_result = sorted(results, key=lambda x: x[5])
        sort_result = deal_rank(sort_result)
    elif rank_method == 'friends_count':
        sort_result = sorted(results, key=lambda x: x[8], reverse=True)
        sort_result = deal_rank(sort_result)
    elif rank_method == 'statuses_count':
        sort_result = sorted(results, key=lambda x: x[10], reverse=True)
        sort_result = deal_rank(sort_result)

    #print 'results:', sort_result
    return sort_result
예제 #2
0
def read_trend_maker(topic, date, windowsize, rank_method):
    results = []
    #print 'topic, date, windowsize:', topic.encode('utf-8'), date, windowsize
    items = db.session.query(TrendMaker).filter(TrendMaker.topic==topic ,\
                                                TrendMaker.date==date ,\
                                                TrendMaker.windowsize==windowsize).all()
    #print 'len(items):', len(items)
    if items:
        for item in items:
            uid= item.uid
            timestamp = item.timestamp
            timestamp = ts2date(timestamp)
            user_info = json.loads(item.user_info)
            uname = user_info['name']
            location = user_info['location']
            profile_image_url = user_info['profile_image_url']
            if profile_image_url == u'未知':
                profile_image_url = 'no'
            friends_count = user_info['friends_count']
            if friends_count == u'未知':
                friends_count = -1
            followers_count = user_info['followers_count']
            statuses_count = user_info['statuses_count']
            if statuses_count == u'未知':
                statuses_count = -1
            created_at = user_info['created_at']

            weibo_info = json.loads(item.weibo_info)
            text = weibo_info['text']
            geo = weibo_info['geo']
            source = weibo_info['source']
            _id = weibo_info['_id']
            reposts_count = weibo_info['reposts_count']
            weibo_link = weiboinfo2url(uid, _id)
            user_domain = item.domain
            domain_name = domain_dict[user_domain]
            rank = item.rank
            value = item.value # 关键词命中个数
            key_item = json.loads(item.key_item) # 命中关键词
            row = [rank, uid, uname, location, domain_name, timestamp, text, profile_image_url, friends_count, followers_count, statuses_count, created_at,geo, source, weibo_link, _id, reposts_count, value, key_item]
            results.append(row)
    sort_result = results
    if rank_method == 'content':
        rank_results = results
        sort_result = deal_rank(rank_results)
    elif rank_method == 'timestamp':
        sort_result = sorted(results, key=lambda x:x[5])
        sort_result = deal_rank(sort_result)
    elif rank_method == 'friends_count':
        sort_result = sorted(results, key=lambda x:x[8], reverse=True)
        sort_result = deal_rank(sort_result)
    elif rank_method == 'statuses_count':
        sort_result = sorted(results, key=lambda x:x[10], reverse=True)
        sort_result = deal_rank(sort_result)
    elif rank_method == 'reposts_count':
        sort_result = sorted(results, key=lambda x:x[16], reverse=True)
        sort_result = deal_rank(sort_result)
    #print 'results:', sort_result
    return sort_result
예제 #3
0
def read_uid_weibos(topic, date, windowsize, uid):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - Day * windowsize
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)

    query_dict = {'user': uid}
    count, results = xapian_search_weibo.search(query=query_dict,
                                                fields=weibo_fields_list)
    if count == 0:
        weibo_list = []
    else:
        weibo_list = []
        for weibo in results():
            wid = weibo['_id']
            uid = weibo['user']
            result = user_search.search_by_id(uid, fields=user_fields_list)
            if result:
                name = result['name']
                location = result['location']
                friends_count = result['friends_count']
                followers_count = result['followers_count']
                created_at = result['created_at']
                statuses_count = result['statuses_count']
                profile_image_url = result['profile_image_url']
            else:
                name = u'未知'
                location = u'未知'
                friends_count = u'未知'
                followers_count = u'未知'
                created_at = u'未知'
                statuses_count = u'未知'
                profile_image_url = u'no'

            text = weibo['text']
            geo = weibo['geo']
            source = weibo['source']
            timestamp = weibo['timestamp']
            date = ts2date(timestamp)
            reposts_count = weibo['reposts_count']
            comments_count = weibo['comments_count']
            weibo_link = weiboinfo2url(uid, wid)
            domain = uid2domain(uid)

            row = [
                wid, uid, name, location, friends_count, followers_count,
                created_at, statuses_count, profile_image_url, date, text, geo,
                source, reposts_count, comments_count, weibo_link
            ]
            weibo_list.append(row)

    sort_weibo_list = sorted(weibo_list, key=lambda x: x[9])
    return sort_weibo_list
예제 #4
0
def parseWeibos(weibos):
  weibo_dict = {}
  weibos = _json_loads(weibos)

  if not weibos:
    return {}

  for weibo in weibos:
    try:
      _id = weibo['_id']
      reposts_count = weibo['reposts_count']
      weibo['weibo_link'] = weiboinfo2url(weibo['user'], _id)
      weibo_dict[_id] = [reposts_count, weibo]
    except:
      continue

  return weibo_dict
예제 #5
0
파일: weibos.py 프로젝트: huxiaoqian/case
def parseWeibos(weibos):
  weibo_dict = {}
  weibos = _json_loads(weibos)

  if not weibos:
    return {}

  for weibo in weibos:
    try:
      _id = weibo['_id']
      username, profileimage = getuserinfo(weibo['user']) # get username and profile_image_url
      reposts_count = weibo['reposts_count']
      weibo['weibo_link'] = weiboinfo2url(weibo['user'], _id)
      weibo['name'] = username
      weibo['profile_image_url'] = profileimage
      weibo['date'] = ts2date(weibo['timestamp'])
      weibo_dict[_id] = [reposts_count, weibo]
    except:
      continue

  return weibo_dict
예제 #6
0
파일: weibos.py 프로젝트: NeilWang6/case
def parseWeibos(weibos):
    weibo_dict = {}
    weibos = _json_loads(weibos)

    if not weibos:
        return {}

    for weibo in weibos:
        try:
            _id = weibo['_id']
            username, profileimage = getuserinfo(
                weibo['user'])  # get username and profile_image_url
            reposts_count = weibo['reposts_count']
            weibo['weibo_link'] = weiboinfo2url(weibo['user'], _id)
            weibo['name'] = username
            weibo['profile_image_url'] = profileimage
            weibo['date'] = ts2date(weibo['timestamp'])
            weibo_dict[_id] = [reposts_count, weibo]
        except:
            continue

    return weibo_dict
예제 #7
0
def parseWeibos(weibos):
    weibo_dict = {}
    weibos = _json_loads(weibos)

    if not weibos:
        return {}

    for weibo in weibos:
        try:
            _id = weibo["_id"]
            username, profileimage = getuserinfo(weibo["user"])
            # print 'username', profileimage
            reposts_count = weibo["reposts_count"]
            # print 'reposts_count', reposts_count
            weibo["weibo_link"] = weiboinfo2url(weibo["user"], _id)
            weibo["username"] = username
            weibo["profile_image_url"] = profileimage
            weibo["timestamp"] = ts2date(weibo["timestamp"])
            # print 'weibo:', weibo
            weibo_dict[_id] = [reposts_count, weibo]
        except:
            continue
    # print 'there :', weibo_dict
    return weibo_dict
예제 #8
0
def get_city_weibo(topic, start_ts, end_ts, unit=MinInterval, limit=TOP_WEIBOS_LIMIT):
    weibos = []
    if end_ts - start_ts < unit:
        upbound = int(math.ceil(end_ts / (unit * 1.0)) * unit)
        item = (
            db.session.query(CityWeibos)
            .filter(
                CityWeibos.end == upbound,
                CityWeibos.topic == topic,
                CityWeibos.range == unit,
                CityWeibos.limit == limit,
            )
            .first()
        )
        if item:
            news = _json_loads(item.weibos)
            for weibo_item in news:
                weibos.append((weibo_item["reposts_count"], weibo_item))
    else:
        upbound = int(math.ceil(end_ts / (unit * 1.0)) * unit)
        lowbound = (start_ts / unit) * unit
        items = (
            db.session.query(CityWeibos)
            .filter(
                CityWeibos.end > lowbound,
                CityWeibos.end <= upbound,
                CityWeibos.topic == topic,
                CityWeibos.range == unit,
                CityWeibos.limit == limit,
            )
            .all()
        )
        for item in items:
            news = _json_loads(item.weibos)
            for weibo_item in news:
                try:
                    weibos.append((weibo_item["reposts_count"], weibo_item))
                except:
                    continue
    sorted_weibos = sorted(weibos, key=lambda k: k[0], reverse=True)

    city_dict = {}
    k = 0
    for reposts_count, result in sorted_weibos:
        k += 1
        if k > 1000:
            break

        uid = result["user"]
        user_info = acquire_user_by_id(uid)
        if user_info:
            result["username"] = user_info["name"]
        else:
            result["username"] = "******"
        time = ts2date(result["timestamp"])
        result["time"] = time
        try:
            if len(result["geo"].split(".")) == 4:
                full_area = IP2city(result["geo"])
                result["geo"] = full_area
                city = full_area.split("\t")[1]
            else:
                city = geo2city(result["geo"]).split("\t")[1]
        except:
            city = ""
        result["weibo_link"] = weiboinfo2url(result["user"], result["_id"])
        if city in province_list:
            try:
                city_dict[city].append(result)
            except:
                city_dict[city] = [result]
    return city_dict
예제 #9
0
def get_city_weibo(topic,
                   start_ts,
                   end_ts,
                   unit=MinInterval,
                   limit=TOP_WEIBOS_LIMIT):
    weibos = []
    if (end_ts - start_ts < unit):
        upbound = int(math.ceil(end_ts / (unit * 1.0)) * unit)
        item = db.session.query(CityWeibos).filter(CityWeibos.end==upbound, \
                                                       CityWeibos.topic==topic, \
                                                       CityWeibos.range==unit, \
                                                       CityWeibos.limit==limit).first()
        if item:
            news = _json_loads(item.weibos)
            for weibo_item in news:
                weibos.append((weibo_item['reposts_count'], weibo_item))
    else:
        upbound = int(math.ceil(end_ts / (unit * 1.0)) * unit)
        lowbound = (start_ts / unit) * unit
        items = db.session.query(CityWeibos).filter(CityWeibos.end>lowbound, \
                                                         CityWeibos.end<=upbound, \
                                                         CityWeibos.topic==topic, \
                                                         CityWeibos.range==unit, \
                                                         CityWeibos.limit==limit).all()
        for item in items:
            news = _json_loads(item.weibos)
            for weibo_item in news:
                try:
                    weibos.append((weibo_item['reposts_count'], weibo_item))
                except:
                    continue
    sorted_weibos = sorted(weibos, key=lambda k: k[0], reverse=True)

    city_dict = {}
    k = 0
    for reposts_count, result in sorted_weibos:
        k += 1
        if k > 1000:
            break

        uid = result['user']
        user_info = acquire_user_by_id(uid)
        if user_info:
            result['username'] = user_info['name']
        else:
            result['username'] = '******'
        time = ts2date(result['timestamp'])
        result['time'] = time
        try:
            if (len(result['geo'].split('.')) == 4):
                full_area = IP2city(result['geo'])
                result['geo'] = full_area
                city = full_area.split('\t')[1]
            else:
                city = geo2city(result['geo']).split('\t')[1]
        except:
            city = ''
        result['weibo_link'] = weiboinfo2url(result['user'], result['_id'])
        if city in province_list:
            try:
                city_dict[city].append(result)
            except:
                city_dict[city] = [result]
    return city_dict
예제 #10
0
def community_result(community_user_list, topic, date, windowsize):
    #change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {'$or': []}
    for uid in community_user_list:
        query_dict['$or'].append({'user': int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict,
                                                      fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'

        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append([
            _id, name, location, friends_count, followers_count, created_at,
            statuses_count, profile_image_url, text, date, reposts_count,
            source, geo, comments_count, sentiment_name, weibo_link, domain
        ])

    sort_community_info = sorted(community_info,
                                 key=lambda x: x[10],
                                 reverse=True)  #以转发量排序

    mset = xapian_search_weibo.search(query=query_dict,
                                      max_offset=50,
                                      mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo,
                                             mset,
                                             fields=['terms']),
                               top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict
예제 #11
0
def read_uid_weibos(topic, date, windowsize, uid):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - Day * windowsize
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)

    query_dict = {"user": uid}
    count, results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list)
    if count == 0:
        weibo_list = []
    else:
        weibo_list = []
        for weibo in results():
            wid = weibo["_id"]
            uid = weibo["user"]
            result = user_search.search_by_id(uid, fields=user_fields_list)
            if result:
                name = result["name"]
                location = result["location"]
                friends_count = result["friends_count"]
                followers_count = result["followers_count"]
                created_at = result["created_at"]
                statuses_count = result["statuses_count"]
                profile_image_url = result["profile_image_url"]
            else:
                name = u"未知"
                location = u"未知"
                friends_count = u"未知"
                followers_count = u"未知"
                created_at = u"未知"
                statuses_count = u"未知"
                profile_image_url = u"no"

            text = weibo["text"]
            geo = weibo["geo"]
            source = weibo["source"]
            timestamp = weibo["timestamp"]
            date = ts2date(timestamp)
            reposts_count = weibo["reposts_count"]
            comments_count = weibo["comments_count"]
            weibo_link = weiboinfo2url(uid, wid)
            domain = uid2domain(uid)

            row = [
                wid,
                uid,
                name,
                location,
                friends_count,
                followers_count,
                created_at,
                statuses_count,
                profile_image_url,
                date,
                text,
                geo,
                source,
                reposts_count,
                comments_count,
                weibo_link,
            ]
            weibo_list.append(row)

    sort_weibo_list = sorted(weibo_list, key=lambda x: x[9])
    return sort_weibo_list
예제 #12
0
def c_weibo_by_ts(topic, date, windowsize, uid, network_type, cid, rank_method):
    real_topic_id = acquire_real_topic_id(topic, date, windowsize)
    if not real_topic_id:
        return None, None, None
        # 该话题存在进行下面的计算
    key_pre = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
    # 选择有向图进行社区信息的计算
    if network_type=='source_graph':
        key = str(GRAPH_PATH)+key_pre + '_gg_graph.gexf'
    elif network_type=='direct_superior_graph':
        key = str(GRAPH_PATH)+key_pre + '_ds_udg_graph.gexf'
    g = nx.read_gexf(key)
    # 获取图结构中节点uid对应的社区包括的节点list
    community_user_list = get_community_user(g, uid, cid)
    # 考虑节点社区属性存放的位置

    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - Day * windowsize
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in community_user_list:
        query_dict['$or'].append({'user': int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None

    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
 
        community_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid, timestamp])
      
    if rank_method == 'reposts_count':
        sort_community_info = sorted(community_info, key=lambda x:x[10], reverse=True) #以转发量排序
    else:
        sort_community_info = sorted(community_info, key=lambda x:x[17]) # 以时间戳排序
        
    return sort_community_info
예제 #13
0
def save_weibos(excel_name, topic, child_topic_list, w_limit): # 这里需要根据文本内容查询相关微博id等
    data = xlrd.open_workbook(excel_name)
    weibos_dict = {}
    for i in child_topic_list:
        #if i == '0':
        #    continue
        weibos_dict[i] = []
        table_weibos = data.sheet_by_name(str(int(i)))
        n_row_weibos = table_weibos.nrows
        if n_row_weibos <= w_limit:
            n_rows = n_row_weibo
        else:
            n_rows = w_limit  # 考虑到数据已经根据权重从大到小排列
        for j in range(n_rows):
            line = table_weibos.row_values(j)  # 缺少根据文本查询微博文本对应的其他微博内容
            weibo_text = line[1]
            weibo_weight = line[0]
            try:
                weibos_dict[i].append((weibo_text, weibo_weight)) # 实际上这里append的应该是weibo的完整内容,并且是将username等获取到的
            except:
                weibos_dict[i]=[(weibo_text, weibo_weight)]
    #print 'weibos_dict:', weibos_dict
    #获取微博具体数据,仅作测试用
    s = XapianSearch(path='/home/ubuntu3/huxiaoqian/case/20140724/20140804/', name='master_timeline_weibo',schema_version='5')
    begin_ts = 1378050300
    end_ts = 1378051200
    query_dict = {
        'timestamp': {'$gt':begin_ts, '$lt': end_ts},
        'message_type' : 2
    }
    weibos_dict_new = {}
    scount, weibo_results =s.search(query=query_dict, fields=fields_list)
    #print 'scount:', scount
    i = 0
    j = 0
    for weibo in weibo_results():
        if i==11:
            break
        weibo['text'] = weibos_dict[str(i)][j][0]
        #获取username,profileimage,weibourl
        username, profileimage = getuserinfo(weibo['user'])
        weibo['username'] = username
        weibo['profile_image_url'] = profileimage
        weibo['timestamp'] = ts2date(weibo['timestamp'])
        weibo['weibo_link'] = weiboinfo2url(weibo['user'],weibo['_id'])
        #获取username, profileimage,weibourl结束       
        weight = weibos_dict[str(i)][j][1]
        try:
            weibos_dict_new[i].append((weibo, weight))
        except:
            weibos_dict_new[i] = [(weibo, weight)]
        if j==4:
            j = 0
            i += 1
        else:
            j +=1
            
        #分割线
    for i in range(len(child_topic_list)):
        item = OpinionTestWeibos(topic, i, json.dumps(weibos_dict_new[i]))
        item_exist = db.session.query(OpinionTestWeibos).filter(OpinionTestWeibos.topic==topic, \
                                                                OpinionTestWeibos.child_topic==i).first()
        if item_exist:
            db.session.delete(item_exist)
        db.session.add(item)
    db.session.commit()
예제 #14
0
def get_info(neighbor_list, topic, date, windowsize):
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {
        '$or' : []
        }
    for uid in neighbor_list:
        query_dict['$or'].append({'user': int(uid)})
    neighbor_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields= weibo_fields_list)
    if count==0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo['user']
        _id = weibo['_id']
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result['name']
            location = result['location']
            friends_count = result['friends_count']
            followers_count = result['followers_count']
            created_at = result['created_at']
            statuses_count = result['statuses_count']
            profile_image_url = result['profile_image_url']
        else:
            name = u'未知'
            location = u'未知'
            friends_count = u'未知'
            followers_count = u'未知'
            created_at = u'未知'
            statuses_count = u'未知'
            profile_image_url = u'no'
            
        text = weibo['text']
        timestamp = weibo['timestamp']
        date = ts2date(timestamp)
        reposts_count = weibo['reposts_count']
        source = weibo['source']
        geo = weibo['geo']
        comments_count = weibo['comments_count']
        sentiment = weibo['sentiment']
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        neighbor_info.append([_id, name, location, friends_count, followers_count, created_at, statuses_count, profile_image_url, text, date, reposts_count, source, geo, comments_count, sentiment_name,weibo_link, uid])
    
    sort_neighbor_info = sorted(neighbor_info, key=lambda x:x[10], reverse=True) #以转发量排序
    
    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x:x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])
   
    return sort_neighbor_info, sort_top_keyword, new_sentiment_list, query_dict
예제 #15
0
def time_domain_top_user(topic, date, windowsize, domain, rank_method):
    #results = {'folk':[], 'media':[], 'opinion_leader':[], 'oversea':[], 'other':[]}
    print 'topic, date, windowsize:', topic.encode('utf-8'), date, windowsize
    #domain_list = ['folk', 'media', 'opinion_leader', 'oversea', 'other']
    items = db.session.query(FirstDomainUser).filter(FirstDomainUser.topic==topic ,\
                                                     FirstDomainUser.date==date ,\
                                                     FirstDomainUser.windowsize==windowsize ,\
                                                     FirstDomainUser.user_domain==domain).all()
    results = []
    for item in items:
        domain = item.user_domain
        domain_name = domain_dict[domain]
        timestamp = item.timestamp
        timestamp = ts2date(timestamp)
        uid = item.uid

        weibo_info = json.loads(item.weibo_info)
        text = weibo_info['text']
        geo = weibo_info['geo']
        source = weibo_info['source']
        _id = weibo_info['_id']
        weibo_link = weiboinfo2url(uid, _id)

        user_info = json.loads(item.user_info)
        uname = user_info['name']
        location = user_info['location']
        profile_image_url = user_info['profile_image_url']
        if profile_image_url == u'未知':
            profile_image_url = ''
        friends_count = user_info['friends_count']
        followers_count = user_info['followers_count']
        statuses_count = user_info['statuses_count']

        if friends_count == u'未知':
            friends_count = -1
        if statuses_count == u'未知':
            statuses_count = -1

        created_at = user_info['created_at']

        #rank = item.rank
        row = [
            uid, uname, location, domain_name, timestamp, text,
            profile_image_url, friends_count, followers_count, statuses_count,
            created_at, geo, source, weibo_link, _id
        ]
        results.append(row)
    sorted_results = []
    if rank_method == 'timestamp':
        sorted_results = sorted(results, key=lambda x: x[4])
    elif rank_method == 'friends_count':
        sorted_results = sorted(results, key=lambda x: x[7], reverse=True)
    elif rank_method == 'statuses_count':
        sorted_results = sorted(results, key=lambda x: x[9], reverse=True)

    new_results = []
    for i in range(len(items)):
        new_row = [i + 1]
        for j in range(len(sorted_results[0])):
            if j == 7 and sorted_results[i][j] == -1:
                new_row.append(u'未知')
            elif j == 9 and sorted_results[i][j] == -1:
                new_row.append(u'未知')
            else:
                new_row.append(sorted_results[i][j])
        new_results.append(new_row)

    return new_results
예제 #16
0
def time_top_user(topic, date, windowsize, rank_method):
    results = []
    print 'topic, date, windowsize:', topic.encode('utf-8'), date, windowsize
    items = db.session.query(FirstUser).filter(FirstUser.topic==topic ,\
                                               FirstUser.date==date ,\
                                               FirstUser.windowsize==windowsize).all()
    #print 'len(items):', len(items)
    if items:
        for item in items:
            uid = item.uid
            timestamp = item.timestamp
            timestamp = ts2date(timestamp)
            user_info = json.loads(item.user_info)
            uname = user_info['name']
            location = user_info['location']
            profile_image_url = user_info['profile_image_url']
            if profile_image_url == u'未知':
                profile_image_url = ''
            friends_count = user_info['friends_count']
            followers_count = user_info['followers_count']
            statuses_count = user_info['statuses_count']

            if friends_count == u'未知':
                friends_count = -1
            if statuses_count == u'未知':
                statuses_count = -1

            created_at = user_info['created_at']

            weibo_info = json.loads(item.weibo_info)
            text = weibo_info['text']
            geo = weibo_info['geo']
            source = weibo_info['source']
            _id = weibo_info['_id']
            weibo_link = weiboinfo2url(uid, _id)
            user_domain = item.user_domain
            domain_name = domain_dict[user_domain]
            row = [
                uid, uname, location, domain_name, timestamp, text,
                profile_image_url, friends_count, followers_count,
                statuses_count, created_at, geo, source, weibo_link, _id
            ]
            results.append(row)
    #print 'results:', results
    sorted_results = []
    #print 'rank_method:', rank_method
    if rank_method == 'timestamp':
        sorted_results = sorted(results, key=lambda x: x[4])
    elif rank_method == 'friends_count':
        sorted_results = sorted(results, key=lambda x: x[7], reverse=True)
    elif rank_method == 'statuses_count':
        sorted_results = sorted(results, key=lambda x: x[9], reverse=True)
    #print 'sorted_results',sorted_results
    #print 'sorted_results[0]:', sorted_results[0]
    new_results = []
    for i in range(len(items)):
        new_row = [i + 1]
        for j in range(len(sorted_results[0])):
            if j == 7 and sorted_results[i][j] == -1:
                new_row.append(u'未知')
            elif j == 9 and sorted_results[i][j] == -1:
                new_row.append(u'未知')
            else:
                new_row.append(sorted_results[i][j])
        new_results.append(new_row)
    return new_results
예제 #17
0
def community_result(community_user_list, topic, date, windowsize):
    # change
    end_ts = datetime2ts(date)
    start_ts = end_ts - windowsize * Day
    xapian_search_weibo = getXapianWeiboByTopic(topic, start_ts, end_ts)
    query_dict = {"$or": []}
    for uid in community_user_list:
        query_dict["$or"].append({"user": int(uid)})
    community_info = []
    count, weibo_results = xapian_search_weibo.search(query=query_dict, fields=weibo_fields_list)
    if count == 0:
        return None, None, None
    sentiment_count = {}
    for weibo in weibo_results():
        uid = weibo["user"]
        _id = weibo["_id"]
        result = user_search.search_by_id(uid, fields=user_fields_list)
        if result:
            name = result["name"]
            location = result["location"]
            friends_count = result["friends_count"]
            followers_count = result["followers_count"]
            created_at = result["created_at"]
            statuses_count = result["statuses_count"]
            profile_image_url = result["profile_image_url"]
        else:
            name = u"未知"
            location = u"未知"
            friends_count = u"未知"
            followers_count = u"未知"
            created_at = u"未知"
            statuses_count = u"未知"
            profile_image_url = u"no"

        text = weibo["text"]
        timestamp = weibo["timestamp"]
        date = ts2date(timestamp)
        reposts_count = weibo["reposts_count"]
        source = weibo["source"]
        geo = weibo["geo"]
        comments_count = weibo["comments_count"]
        sentiment = weibo["sentiment"]
        sentiment_name = emotions_kv[sentiment]
        weibo_link = weiboinfo2url(uid, _id)
        domain = uid2domain(uid)

        try:
            sentiment_count[sentiment] += 1
        except KeyError:
            sentiment_count[sentiment] = 1
        community_info.append(
            [
                _id,
                name,
                location,
                friends_count,
                followers_count,
                created_at,
                statuses_count,
                profile_image_url,
                text,
                date,
                reposts_count,
                source,
                geo,
                comments_count,
                sentiment_name,
                weibo_link,
                domain,
            ]
        )

    sort_community_info = sorted(community_info, key=lambda x: x[10], reverse=True)  # 以转发量排序

    mset = xapian_search_weibo.search(query=query_dict, max_offset=50, mset_direct=True)
    top_keyword = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=["terms"]), top=50)

    sort_top_keyword = sorted(top_keyword, key=lambda x: x[1], reverse=True)

    new_sentiment_list = []
    for sentiment in sentiment_count:
        sentiment_ch = emotions_kv[int(sentiment)]
        num = sentiment_count[sentiment]
        ratio = float(num) / float(count)
        new_sentiment_list.append([sentiment_ch, num, ratio])

    return sort_community_info, sort_top_keyword, new_sentiment_list, query_dict
예제 #18
0
def save_weibos(excel_name, topic, child_topic_list,
                w_limit):  # 这里需要根据文本内容查询相关微博id等
    data = xlrd.open_workbook(excel_name)
    weibos_dict = {}
    for i in child_topic_list:
        #if i == '0':
        #    continue
        weibos_dict[i] = []
        table_weibos = data.sheet_by_name(str(int(i)))
        n_row_weibos = table_weibos.nrows
        if n_row_weibos <= w_limit:
            n_rows = n_row_weibo
        else:
            n_rows = w_limit  # 考虑到数据已经根据权重从大到小排列
        for j in range(n_rows):
            line = table_weibos.row_values(j)  # 缺少根据文本查询微博文本对应的其他微博内容
            weibo_text = line[1]
            weibo_weight = line[0]
            try:
                weibos_dict[i].append(
                    (weibo_text, weibo_weight
                     ))  # 实际上这里append的应该是weibo的完整内容,并且是将username等获取到的
            except:
                weibos_dict[i] = [(weibo_text, weibo_weight)]
    #print 'weibos_dict:', weibos_dict
    #获取微博具体数据,仅作测试用
    s = XapianSearch(path='/home/ubuntu3/huxiaoqian/case/20140724/20140804/',
                     name='master_timeline_weibo',
                     schema_version='5')
    begin_ts = 1378050300
    end_ts = 1378051200
    query_dict = {
        'timestamp': {
            '$gt': begin_ts,
            '$lt': end_ts
        },
        'message_type': 2
    }
    weibos_dict_new = {}
    scount, weibo_results = s.search(query=query_dict, fields=fields_list)
    #print 'scount:', scount
    i = 0
    j = 0
    for weibo in weibo_results():
        if i == 11:
            break
        weibo['text'] = weibos_dict[str(i)][j][0]
        #获取username,profileimage,weibourl
        username, profileimage = getuserinfo(weibo['user'])
        weibo['username'] = username
        weibo['profile_image_url'] = profileimage
        weibo['timestamp'] = ts2date(weibo['timestamp'])
        weibo['weibo_link'] = weiboinfo2url(weibo['user'], weibo['_id'])
        #获取username, profileimage,weibourl结束
        weight = weibos_dict[str(i)][j][1]
        try:
            weibos_dict_new[i].append((weibo, weight))
        except:
            weibos_dict_new[i] = [(weibo, weight)]
        if j == 4:
            j = 0
            i += 1
        else:
            j += 1

        #分割线
    for i in range(len(child_topic_list)):
        item = OpinionTestWeibos(topic, i, json.dumps(weibos_dict_new[i]))
        item_exist = db.session.query(OpinionTestWeibos).filter(OpinionTestWeibos.topic==topic, \
                                                                OpinionTestWeibos.child_topic==i).first()
        if item_exist:
            db.session.delete(item_exist)
        db.session.add(item)
    db.session.commit()
예제 #19
0
def time_domain_top_user(topic, date, windowsize, domain, rank_method):
    # results = {'folk':[], 'media':[], 'opinion_leader':[], 'oversea':[], 'other':[]}
    print "topic, date, windowsize:", topic.encode("utf-8"), date, windowsize
    # domain_list = ['folk', 'media', 'opinion_leader', 'oversea', 'other']
    items = (
        db.session.query(FirstDomainUser)
        .filter(
            FirstDomainUser.topic == topic,
            FirstDomainUser.date == date,
            FirstDomainUser.windowsize == windowsize,
            FirstDomainUser.user_domain == domain,
        )
        .all()
    )
    results = []
    for item in items:
        domain = item.user_domain
        domain_name = domain_dict[domain]
        timestamp = item.timestamp
        timestamp = ts2date(timestamp)
        uid = item.uid

        weibo_info = json.loads(item.weibo_info)
        text = weibo_info["text"]
        geo = weibo_info["geo"]
        source = weibo_info["source"]
        _id = weibo_info["_id"]
        weibo_link = weiboinfo2url(uid, _id)

        user_info = json.loads(item.user_info)
        uname = user_info["name"]
        location = user_info["location"]
        profile_image_url = user_info["profile_image_url"]
        if profile_image_url == u"未知":
            profile_image_url = ""
        friends_count = user_info["friends_count"]
        followers_count = user_info["followers_count"]
        statuses_count = user_info["statuses_count"]

        if friends_count == u"未知":
            friends_count = -1
        if statuses_count == u"未知":
            statuses_count = -1

        created_at = user_info["created_at"]

        # rank = item.rank
        row = [
            uid,
            uname,
            location,
            domain_name,
            timestamp,
            text,
            profile_image_url,
            friends_count,
            followers_count,
            statuses_count,
            created_at,
            geo,
            source,
            weibo_link,
            _id,
        ]
        results.append(row)
    sorted_results = []
    if rank_method == "timestamp":
        sorted_results = sorted(results, key=lambda x: x[4])
    elif rank_method == "friends_count":
        sorted_results = sorted(results, key=lambda x: x[7], reverse=True)
    elif rank_method == "statuses_count":
        sorted_results = sorted(results, key=lambda x: x[9], reverse=True)

    new_results = []
    for i in range(len(items)):
        new_row = [i + 1]
        for j in range(len(sorted_results[0])):
            if j == 7 and sorted_results[i][j] == -1:
                new_row.append(u"未知")
            elif j == 9 and sorted_results[i][j] == -1:
                new_row.append(u"未知")
            else:
                new_row.append(sorted_results[i][j])
        new_results.append(new_row)

    return new_results
예제 #20
0
def time_top_user(topic, date, windowsize, rank_method):
    results = []
    print "topic, date, windowsize:", topic.encode("utf-8"), date, windowsize
    items = (
        db.session.query(FirstUser)
        .filter(FirstUser.topic == topic, FirstUser.date == date, FirstUser.windowsize == windowsize)
        .all()
    )
    # print 'len(items):', len(items)
    if items:
        for item in items:
            uid = item.uid
            timestamp = item.timestamp
            timestamp = ts2date(timestamp)
            user_info = json.loads(item.user_info)
            uname = user_info["name"]
            location = user_info["location"]
            profile_image_url = user_info["profile_image_url"]
            if profile_image_url == u"未知":
                profile_image_url = ""
            friends_count = user_info["friends_count"]
            followers_count = user_info["followers_count"]
            statuses_count = user_info["statuses_count"]

            if friends_count == u"未知":
                friends_count = -1
            if statuses_count == u"未知":
                statuses_count = -1

            created_at = user_info["created_at"]

            weibo_info = json.loads(item.weibo_info)
            text = weibo_info["text"]
            geo = weibo_info["geo"]
            source = weibo_info["source"]
            _id = weibo_info["_id"]
            weibo_link = weiboinfo2url(uid, _id)
            user_domain = item.user_domain
            domain_name = domain_dict[user_domain]
            row = [
                uid,
                uname,
                location,
                domain_name,
                timestamp,
                text,
                profile_image_url,
                friends_count,
                followers_count,
                statuses_count,
                created_at,
                geo,
                source,
                weibo_link,
                _id,
            ]
            results.append(row)
    # print 'results:', results
    sorted_results = []
    # print 'rank_method:', rank_method
    if rank_method == "timestamp":
        sorted_results = sorted(results, key=lambda x: x[4])
    elif rank_method == "friends_count":
        sorted_results = sorted(results, key=lambda x: x[7], reverse=True)
    elif rank_method == "statuses_count":
        sorted_results = sorted(results, key=lambda x: x[9], reverse=True)
    # print 'sorted_results',sorted_results
    # print 'sorted_results[0]:', sorted_results[0]
    new_results = []
    for i in range(len(items)):
        new_row = [i + 1]
        for j in range(len(sorted_results[0])):
            if j == 7 and sorted_results[i][j] == -1:
                new_row.append(u"未知")
            elif j == 9 and sorted_results[i][j] == -1:
                new_row.append(u"未知")
            else:
                new_row.append(sorted_results[i][j])
        new_results.append(new_row)
    return new_results