Exemplo n.º 1
0
def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode(
                'utf-8')

            mtype_count = {}
            mtype_kcount = {}  # mtype_kcount={mtype:[terms]}
            mtype_weibo = {}  # mtype_weibo={mtype:weibo}

            query_dict = {'timestamp': {'$gt': begin_ts, '$lt': end_ts}}

            for k, v in mtype_kv.iteritems():
                query_dict['message_type'] = v

                count, results = xapian_search_weibo.search(query=query_dict,
                                                            fields=fields_list)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo,
                                                    mset,
                                                    fields=['terms']),
                                      top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)
Exemplo n.º 2
0
def propagateCronTopic(topic, xapian_search_weibo, start_ts, over_ts, sort_field=SORT_FIELD, \
    save_fields=RESP_ITER_KEYS, during=Fifteenminutes, w_limit=TOP_WEIBOS_LIMIT, k_limit=TOP_KEYWORDS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)
        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        for i in range(interval, 0, -1):
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, 'topic %s starts calculate' % topic.encode('utf-8')
            
            mtype_count = {}
            mtype_kcount = {} # mtype_kcount={mtype:[terms]}
            mtype_weibo = {} # mtype_weibo={mtype:weibo}

            query_dict = {
                'timestamp': {'$gt': begin_ts, '$lt': end_ts}
            }

            for k, v in mtype_kv.iteritems():
                query_dict['message_type'] = v
                
                count, results = xapian_search_weibo.search(query=query_dict, fields=fields_list)

                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)
Exemplo n.º 3
0
def cityTopic(topic,
              start_ts,
              over_ts,
              during=Fifteenminutes,
              n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        #start_ts = int(start_ts)
        #over_ts = int(over_ts)

        #over_ts = ts2HourlyTime(over_ts, during)
        #interval = (over_ts - start_ts) / during

        geo_cityTopic_results = {}
        geo_cityTopic_results['geo_weibos'] = {}
        geo_cityTopic_results['geo_cityCount'] = {}

        province_dict = {}

        for k, v in mtype_kv.iteritems():  #v代表转发、评论、原创

            first_item = {}

            query_body = {  #按message_type得到微博
                'query': {
                    'bool': {
                        'must': [{
                            'term': {
                                'message_type': v
                            }
                        }, {
                            'range': {
                                'timestamp': {
                                    'gte': start_ts,
                                    'lt': over_ts
                                }
                            }
                        }]
                    }
                },
                'sort': {
                    SORT_FIELD: {
                        "order": "desc"
                    }
                },
                'size': 10000000
            }
            mtype_weibo = weibo_es.search(index=topic,
                                          doc_type=weibo_index_type,
                                          body=query_body)['hits']['hits']
            #save_ws_results(topic, end_ts, during, n_limit, mtype_weibo)
            #微博直接保存下来
            if len(mtype_weibo) == 0:
                continue
            first_item = mtype_weibo[0]['_source']
            #数每个地方的不同类型的数量
            count_i = 0
            for weibo in mtype_weibo:  #对于每条微博
                count_i += 1
                try:
                    geo = weibo['_source']['geo'].encode('utf8')
                except:
                    continue
                #print geo,type(geo)
                province, city = split_city(geo)
                #print province,city
                if count_i <= n_limit:
                    try:
                        geo_cityTopic_results['geo_weibos'][v].append(
                            [province, city, weibo])
                    except:
                        geo_cityTopic_results['geo_weibos'][v] = [[
                            province, city, weibo
                        ]]

                if province != 'unknown':
                    try:
                        province_dict[province][city] += 1
                    except:
                        try:

                            province_dict[province][city] = 1
                        except:
                            province_dict[province] = {city: 1}

                    try:
                        province_dict[province]['total'] += 1
                    except:
                        try:
                            province_dict[province]['total'] = 1
                        except:
                            province_dict[province] = {'total': 1}

            geo_cityTopic_results['geo_cityCount'][v] = province_dict

        return geo_cityTopic_results
Exemplo n.º 4
0
def cityTopic(topic,
              start_ts,
              over_ts,
              during=Fifteenminutes,
              n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during

        #topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            mtype_ccount = {}  # mtype为message_type,ccount为{city:count}
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            print begin_ts, end_ts, topic
            weibos = []
            first_item = {}
            for k, v in mtype_kv.iteritems():  #v代表转发、评论、原创
                province_dict = {}
                city_dict = {}
                query_body = {  #按message_type得到微博
                    'query': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'message_type': v
                                }
                            }, {
                                'range': {
                                    'timestamp': {
                                        'gte': begin_ts,
                                        'lt': end_ts
                                    }
                                }
                            }]
                        }
                    },
                    'sort': {
                        SORT_FIELD: {
                            "order": "desc"
                        }
                    },
                    'size': n_limit
                }
                mtype_weibo = weibo_es.search(index=topic,
                                              doc_type=weibo_index_type,
                                              body=query_body)['hits']['hits']
                #save_ws_results(topic, end_ts, during, n_limit, mtype_weibo)
                #微博直接保存下来
                if len(mtype_weibo) == 0:
                    continue
                first_item = mtype_weibo[0]['_source']
                #数每个地方的不同类型的数量
                for weibo in mtype_weibo:  #对于每条微博
                    try:
                        geo = weibo['_source']['geo'].encode('utf8')
                    except:
                        continue
                    #print geo,type(geo)
                    province, city = split_city(geo)
                    #print province,city
                    if province != 'unknown':
                        try:
                            province_dict[province][city] += 1
                            province_dict[province]['total'] += 1
                        except:
                            province_dict[province] = {}
                            province_dict[province][city] = 1
                            province_dict[province]['total'] = 1
                        save_ws_results(topic, end_ts, during, n_limit,
                                        province, city, weibo)
                        # try:
                        #     city_dict[city] += 1
                        # except:
                        #     city_dict[city] = 1
                        # try:
                        #     province_dict[province].append(city_dict)
                        # except:
                        #     province_dict[province] = []
                        #     province_dict[province].append(city_dict)
                        # try:
                        #     province_dict[province] += 1
                        # except:
                        #     province_dict[province] = 1
                # sorted_province_dict = sorted(province_dict.items(), key=lambda x: x[0], reverse=False)[:n_limit]  #就是x[0]
                # sorted_city_dict = sorted(city_dict.items(), key=lambda x: x[0], reverse=False)[:n_limit]
                # print sorted_province_dict
                # print sorted_city_dict
                ccount = province_dict
                # ccount['province'] = sorted_province_dict
                # ccount['city'] = sorted_city_dict
                mtype_ccount[v] = [
                    end_ts, ccount
                ]  #{'message_type':[shijian,{['province':('provice':cishu),()],'city':[(city:cishu)}]}
                #print mtype_ccount
                save_rt_results(topic, mtype_ccount, during, first_item)
def cityTopic(topic,start_ts,over_ts,during=Fifteenminutes, n_limit=TOP_WEIBOS_LIMIT):
    if topic and topic != '':
        start_ts = int(start_ts)
        over_ts = int(over_ts)

        over_ts = ts2HourlyTime(over_ts, during)
        interval = (over_ts - start_ts) / during


        item_exist = es_event.get(index=event_analysis_name,doc_type=event_type,id=topic)['_source']
        try:
            geo_result = json.loads(item_exist['geo_results'])
        except:
            geo_result = {}


        #topics = topic.strip().split(',')
        for i in range(interval, 0, -1):
            mtype_ccount = {}  # mtype为message_type,ccount为{city:count}
            begin_ts = over_ts - during * i
            end_ts = begin_ts + during
            # print begin_ts,end_ts,topic
            weibos = []
            first_item = {}
            
            for k,v in mtype_kv.iteritems(): #v代表转发、评论、原创

                #geo_result['geo_cityCount'][end_ts][v] = []

                #geo_result = {}
                #city_dict = {}
                query_body = {   #按message_type得到微博
                    'query':{
                        'bool':{
                            'must':[
                                {'term':{'message_type':v}},  
                                # {'term':{'en_name':topic}},
                                {'range':{
                                    'timestamp':{'gte': begin_ts, 'lt':end_ts} 
                                }
                            }]
                        }
                    },
                    'sort':{SORT_FIELD:{"order":"desc"}},
                    'size':n_limit
                    }
                # print topic,event_text_type,query_body
                mtype_weibo = es_event.search(index=topic,doc_type=event_text_type,body=query_body)['hits']['hits']
                # print len(mtype_weibo)
                #save_ws_results(topic, end_ts, during, n_limit, mtype_weibo)    
                #微博直接保存下来
                # print '160',es_event,event_text,event_text_type,query_body,len(mtype_weibo)
                if len(mtype_weibo) == 0:
                    continue
                first_item = mtype_weibo[0]['_source']
                #数每个地方的不同类型的数量
                for weibo in mtype_weibo:  #对于每条微博
                    try:
                        geo = weibo['_source']['geo'].encode('utf8')
                    except:
                        continue
                    #print geo,type(geo)
                    province,city = split_city(geo)
                    #print province,city

                    
                    if province != 'unknown':
                        try:
                            geo_result[v][province][city]+=1
                            geo_result[v][province]['total']+=1
                        except:
                            try:
                                geo_result[v][province][city]=1
                                geo_result[v][province]['total']+=1
                            except:
                                try:
                                    geo_result[v][province]={city:1,'total':1}
                                except:
                                    try:
                                        geo_result[v]={province:{city:1,'total':1}}
                                    except:
                                        geo_result={v:{province:{city:1,'total':1}}}

                        
                    
                        # geo_result[v][province][city] += 1  
                        # try:
                        #     geo_result[v][province]['total'] += 1
                        # except:
                        #     try:
                        #         geo_result[v][province]['total']=1
                        #     except:
                        #         geo_result[v]={province:{'total':1}}

                                
                #geo_result[end_ts][v] = geo_result
                #print mtype_ccount   v:message type
                #save_rt_results(topic, mtype_ccount, during, first_item)

        save_rt_results_es(topic, geo_result)

        return geo_result