def geo_list(r, topic):  #对每条微博得到转微博、mid、话题、时间、原地理位置、转发地理位置
    # {original:xx, mid:xx, topic:xx, ts:xx, origin_location:xx, repost_location:xx}
    location_dict = {}
    message_type = r['message_type']
    if message_type == 3:  # 转发
        geo = r['geo'].encode('utf8')
        try:
            repost_location = str(split_city(geo))  #把元组转换成了字符串
        except:
            return None
        #print r['mid'],r['root_mid']
        if r['root_mid']:
            query_body = {
                'query': {
                    'filtered': {
                        'filter': {
                            'term': {
                                'mid': r['root_mid']
                            }
                        }
                    }
                }
            }
            item = weibo_es.search(index=topic,
                                   doc_type=weibo_index_type,
                                   body=query_body)['hits']['hits']
            if item != []:
                try:
                    origin_location = str(
                        split_city(item[0]['_source']['geo'].encode('utf8')))
                except:
                    return None
                #if repost_location[2:4] != 'unknown' and origin_location[2:4] != 'un':
                if repost_location[2:4] != 'un' and origin_location[
                        2:4] != 'un':  # str(['unknown','unknown'])所以2,3位‘un’
                    location_dict['original'] = 0
                    location_dict['mid'] = r['mid']
                    location_dict['topic'] = topic
                    location_dict['ts'] = r['timestamp']
                    location_dict['origin_location'] = origin_location
                    location_dict['repost_location'] = repost_location
                    return location_dict
    else:
        geo = r['geo'].encode('utf8')
        try:
            origin_location = str(split_city(geo))
        except:
            return None
        if origin_location[2:4] != 'un':
            location_dict['original'] = 1
            location_dict['mid'] = r['mid']
            location_dict['topic'] = topic
            location_dict['ts'] = r['timestamp']
            location_dict['origin_location'] = origin_location
            location_dict['repost_location'] = None
            return location_dict

    return None
Exemplo n.º 2
0
def cityTopic(uids_list,flow_text_index_name,n_limit=TOP_WEIBOS_LIMIT):
    if flow_text_index_name and flow_text_index_name != '':

        geo_cityTopic_results = {}
        geo_cityTopic_results['geo_weibos'] = {}
        geo_cityTopic_results['geo_cityCount'] = {}
        
        province_dict = {}
        

        query_body = {   
            'query':{
                'filtered':{
                    'filter':{
                        'terms':{
                            'uid':uids_list
                        }
                    }
                }
            },
            'sort':{SORT_FIELD:{"order":"desc"}},
            'size':n_limit
            }
        mtype_weibo = es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits']
       
        for weibo in mtype_weibo:  #对于每条微博
            
            try:
                geo = weibo['_source']['geo'].encode('utf8')
                print 'geo'
                print geo
            except:
                continue
            #print geo,type(geo)
            province,city = split_city(geo)
            #print province,city

            if province != 'unknown':
                try:
                    province_dict[province][city] += 1  
                except:
                    try:

                        province_dict[province][city] = 1
                    except:
                        province_dict[province] = {city:1}

                       
                try:
                    province_dict[province]['total'] += 1
                except:
                    try:
                        province_dict[province]['total'] = 1
                    except:
                        province_dict[province] = {'total': 1}
                    

        geo_cityTopic_results = province_dict
               
        return geo_cityTopic_results
Exemplo n.º 3
0
def cityTopic(flow_text_index_name,n_limit=TOP_WEIBOS_LIMIT):
    if flow_text_index_name and flow_text_index_name != '':

        geo_cityTopic_results = {}
        geo_cityTopic_results['geo_weibos'] = {}
        geo_cityTopic_results['geo_cityCount'] = {}
        
        province_dict = {}
        
        first_item = {}

        query_body = {   
            'query':{
                'match_all':{}
            },
            'sort':{SORT_FIELD:{"order":"desc"}},
            'size':n_limit
            }
        mtype_weibo = weibo_es.search(index=flow_text_index_name,doc_type=flow_text_index_type,body=query_body)['hits']['hits']
        #save_ws_results(topic, end_ts, during, n_limit, mtype_weibo)    
        #微博直接保存下来
        #if len(mtype_weibo) == 0:
        #    continue
        first_item = mtype_weibo[0]['_source']
        #数每个地方的不同类型的数量
        
        for weibo in mtype_weibo:  #对于每条微博
            
            try:
                geo = weibo['_source']['geo'].encode('utf8')
            except:
                continue
            #print geo,type(geo)
            province,city = split_city(geo)
            #print province,city

            if province != 'unknown':
                try:
                    province_dict[province][city] += 1  
                except:
                    try:

                        province_dict[province][city] = 1
                    except:
                        province_dict[province] = {city:1}

                       
                try:
                    province_dict[province]['total'] += 1
                except:
                    try:
                        province_dict[province]['total'] = 1
                    except:
                        province_dict[province] = {'total': 1}
                    

        geo_cityTopic_results = province_dict
               
        return geo_cityTopic_results
Exemplo n.º 4
0
def cityTopic(topic,
              start_ts,
              over_ts,
              during=Fifteenminutes,
              n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        #start_ts = int(start_ts)
        #over_ts = int(over_ts)

        #over_ts = ts2HourlyTime(over_ts, during)
        #interval = (over_ts - start_ts) / during

        geo_cityTopic_results = {}
        geo_cityTopic_results['geo_weibos'] = {}
        geo_cityTopic_results['geo_cityCount'] = {}

        province_dict = {}

        for k, v in mtype_kv.iteritems():  #v代表转发、评论、原创

            first_item = {}

            query_body = {  #按message_type得到微博
                'query': {
                    'bool': {
                        'must': [{
                            'term': {
                                'message_type': v
                            }
                        }, {
                            'range': {
                                'timestamp': {
                                    'gte': start_ts,
                                    'lt': over_ts
                                }
                            }
                        }]
                    }
                },
                'sort': {
                    SORT_FIELD: {
                        "order": "desc"
                    }
                },
                'size': 10000000
            }
            mtype_weibo = weibo_es.search(index=topic,
                                          doc_type=weibo_index_type,
                                          body=query_body)['hits']['hits']
            #save_ws_results(topic, end_ts, during, n_limit, mtype_weibo)
            #微博直接保存下来
            if len(mtype_weibo) == 0:
                continue
            first_item = mtype_weibo[0]['_source']
            #数每个地方的不同类型的数量
            count_i = 0
            for weibo in mtype_weibo:  #对于每条微博
                count_i += 1
                try:
                    geo = weibo['_source']['geo'].encode('utf8')
                except:
                    continue
                #print geo,type(geo)
                province, city = split_city(geo)
                #print province,city
                if count_i <= n_limit:
                    try:
                        geo_cityTopic_results['geo_weibos'][v].append(
                            [province, city, weibo])
                    except:
                        geo_cityTopic_results['geo_weibos'][v] = [[
                            province, city, weibo
                        ]]

                if province != 'unknown':
                    try:
                        province_dict[province][city] += 1
                    except:
                        try:

                            province_dict[province][city] = 1
                        except:
                            province_dict[province] = {city: 1}

                    try:
                        province_dict[province]['total'] += 1
                    except:
                        try:
                            province_dict[province]['total'] = 1
                        except:
                            province_dict[province] = {'total': 1}

            geo_cityTopic_results['geo_cityCount'][v] = province_dict

        return geo_cityTopic_results
Exemplo n.º 5
0
def cityTopic(topic,
              start_ts,
              over_ts,
              during=Fifteenminutes,
              n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        #topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            mtype_ccount = {}  # mtype为message_type,ccount为{city:count}
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, topic
            weibos = []
            first_item = {}
            for k, v in mtype_kv.iteritems():  #v代表转发、评论、原创
                province_dict = {}
                city_dict = {}
                query_body = {  #按message_type得到微博
                    'query': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'message_type': v
                                }
                            }, {
                                'range': {
                                    'timestamp': {
                                        'gte': begin_ts,
                                        'lt': end_ts
                                    }
                                }
                            }]
                        }
                    },
                    'sort': {
                        SORT_FIELD: {
                            "order": "desc"
                        }
                    },
                    'size': n_limit
                }
                mtype_weibo = weibo_es.search(index=topic,
                                              doc_type=weibo_index_type,
                                              body=query_body)['hits']['hits']
                #save_ws_results(topic, end_ts, during, n_limit, mtype_weibo)
                #微博直接保存下来
                if len(mtype_weibo) == 0:
                    continue
                first_item = mtype_weibo[0]['_source']
                #数每个地方的不同类型的数量
                for weibo in mtype_weibo:  #对于每条微博
                    try:
                        geo = weibo['_source']['geo'].encode('utf8')
                    except:
                        continue
                    #print geo,type(geo)
                    province, city = split_city(geo)
                    #print province,city
                    if province != 'unknown':
                        try:
                            province_dict[province][city] += 1
                            province_dict[province]['total'] += 1
                        except:
                            province_dict[province] = {}
                            province_dict[province][city] = 1
                            province_dict[province]['total'] = 1
                        save_ws_results(topic, end_ts, during, n_limit,
                                        province, city, weibo)
                        # try:
                        #     city_dict[city] += 1
                        # except:
                        #     city_dict[city] = 1
                        # try:
                        #     province_dict[province].append(city_dict)
                        # except:
                        #     province_dict[province] = []
                        #     province_dict[province].append(city_dict)
                        # try:
                        #     province_dict[province] += 1
                        # except:
                        #     province_dict[province] = 1
                # sorted_province_dict = sorted(province_dict.items(), key=lambda x: x[0], reverse=False)[:n_limit]  #就是x[0]
                # sorted_city_dict = sorted(city_dict.items(), key=lambda x: x[0], reverse=False)[:n_limit]
                # print sorted_province_dict
                # print sorted_city_dict
                ccount = province_dict
                # ccount['province'] = sorted_province_dict
                # ccount['city'] = sorted_city_dict
                mtype_ccount[v] = [
                    end_ts, ccount
                ]  #{'message_type':[shijian,{['province':('provice':cishu),()],'city':[(city:cishu)}]}
                #print mtype_ccount
                save_rt_results(topic, mtype_ccount, during, first_item)
def cityTopic(topic,start_ts,over_ts,during=Fifteenminutes, n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during


        item_exist = es_event.get(index=event_analysis_name,doc_type=event_type,id=topic)['_source']
        try:
            geo_result = json.loads(item_exist['geo_results'])
        except:
            geo_result = {}


        #topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            mtype_ccount = {}  # mtype为message_type,ccount为{city:count}
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            # print begin_ts,end_ts,topic
            weibos = []
            first_item = {}
            
            for k,v in mtype_kv.iteritems(): #v代表转发、评论、原创

                #geo_result['geo_cityCount'][end_ts][v] = []

                #geo_result = {}
                #city_dict = {}
                query_body = {   #按message_type得到微博
                    'query':{
                        'bool':{
                            'must':[
                                {'term':{'message_type':v}},  
                                # {'term':{'en_name':topic}},
                                {'range':{
                                    'timestamp':{'gte': begin_ts, 'lt':end_ts} 
                                }
                            }]
                        }
                    },
                    'sort':{SORT_FIELD:{"order":"desc"}},
                    'size':n_limit
                    }
                # print topic,event_text_type,query_body
                mtype_weibo = es_event.search(index=topic,doc_type=event_text_type,body=query_body)['hits']['hits']
                # print len(mtype_weibo)
                #save_ws_results(topic, end_ts, during, n_limit, mtype_weibo)    
                #微博直接保存下来
                # print '160',es_event,event_text,event_text_type,query_body,len(mtype_weibo)
                if len(mtype_weibo) == 0:
                    continue
                first_item = mtype_weibo[0]['_source']
                #数每个地方的不同类型的数量
                for weibo in mtype_weibo:  #对于每条微博
                    try:
                        geo = weibo['_source']['geo'].encode('utf8')
                    except:
                        continue
                    #print geo,type(geo)
                    province,city = split_city(geo)
                    #print province,city

                    
                    if province != 'unknown':
                        try:
                            geo_result[v][province][city]+=1
                            geo_result[v][province]['total']+=1
                        except:
                            try:
                                geo_result[v][province][city]=1
                                geo_result[v][province]['total']+=1
                            except:
                                try:
                                    geo_result[v][province]={city:1,'total':1}
                                except:
                                    try:
                                        geo_result[v]={province:{city:1,'total':1}}
                                    except:
                                        geo_result={v:{province:{city:1,'total':1}}}

                        
                    
                        # geo_result[v][province][city] += 1  
                        # try:
                        #     geo_result[v][province]['total'] += 1
                        # except:
                        #     try:
                        #         geo_result[v][province]['total']=1
                        #     except:
                        #         geo_result[v]={province:{'total':1}}

                                
                #geo_result[end_ts][v] = geo_result
                #print mtype_ccount   v:message type
                #save_rt_results(topic, mtype_ccount, during, first_item)

        save_rt_results_es(topic, geo_result)

        return geo_result