Пример #1
0
def get_maker_weibos_sort_es(topic, identifyDate, identifyWindow, sort_item):
    query_body = {'query': {'term': {'en_name': topic}}}

    network_results = es.search(index=index_event_analysis_results,
                                doc_type=type_event_analysis_results,
                                body=query_body)['hits']['hits']
    weibos = []
    for network_result in network_results:

        network_result = network_result['_source']['network_results']
        network_result = json.loads(network_result)
        maker_results = network_result['maker_results']

        for uid, maker_info in maker_results.iteritems():
            user_info = maker_info['user_info']
            user_info = json.loads(user_info)
            weibos_info = maker_info['weibo_info']
            weibos_info = json.loads(weibos_info)

            for weibo_info in weibos_info:
                weibo_info['_source']['uname'] = user_info['name']
                weibo_info['_source']['photo_url'] = user_info[
                    'profile_image_url']
                if weibo_info in weibos:
                    continue
                else:
                    weibos.append(weibo_info)

    sorted_weibos = sorted(weibos,
                           key=lambda x: x['_source'][sort_item],
                           reverse=True)
    #print 'sorted_weibos::::::::::::::',sorted_weibos
    return sorted_weibos
Пример #2
0
def ajax_show_all_task():
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "submit_time": {
                "order": "desc"
            }
        },
        "size": 1000
    }

    es_results = es_prediction.search(index=index_manage_interfere_task, doc_type=type_manage_interfere_task, \
            body=query_body)["hits"]["hits"]
    #print '84::::::::',es_results
    '''
    task_list = []
    for item in es_results:
        tmp = []
        item_detail = item["_source"]
        tmp.append(item_detail["task_name"])
        tmp.append(item_detail["submit_user"]) # 可忽略
        tmp.append(item_detail["submit_time"])
        tmp.append(item_detail["stop_time"])
        tmp.append(item_detail["update_time"])
        tmp.append(item_detail["remark"])
        task_list.append(tmp)
    '''
    task_list = []
    for item in es_results:
        task_list.append(item["_source"])

    return json.dumps(task_list)
Пример #3
0
def get_gexf_es(topic, identifyDate, identifyWindow):
    #key = _utf8_unicode(topic) +'_' + str(identifyDate) + '_' + str(identifyWindow) + '_' + 'source_graph'
    #key = str(key)

    #gexf2es(key, value)
    print 'topic:::', topic
    query_body = {'query': {'term': {'en_name': topic}}}

    network_results = es.search(index=index_event_analysis_results,
                                doc_type=type_event_analysis_results,
                                body=query_body)['hits']['hits']

    for network_result in network_results:

        network_result = network_result['_source']['network_results']

        network_result = json.loads(network_result)

        gexf_results = network_result['long_gexf']

    for key, value in gexf_results.iteritems():
        #print value.keys()
        #print value['date']
        #print value['window']
        #print type(value)
        #print key
        result = value['gexf']
        print type(result)

    #result = read_long_gexf(topic, identifyDate, identifyWindow)

#result = gexf_process(result)
#print result
    return result
Пример #4
0
def ajax_show_analysis_task():
    current_time = time.time()
    query_body = {
        "query": {
            "range": {
                "submit_time": {
                    "gte": current_time - 20 * 24 * 3600
                }
            }
        },
        "size": 1000,
        "sort": {
            "submit_time": {
                "order": "desc"
            }
        }
    }

    es_results = es_prediction.search(index=index_manage_event_analysis,doc_type=\
            type_manage_event_analysis, body=query_body)["hits"]["hits"]

    results = []
    for item in es_results:
        results.append(item["_source"])
    print '173::::::::::::::', results
    return json.dumps(results)
Пример #5
0
def get_sen_province_count_es_final(en_name,
                                    start_ts,
                                    end_ts,
                                    unit=MinInterval):

    sen_geo_results_dict = {}
    sen_geo_count_results_final = []

    query_body = {'query': {'term': {'en_name': en_name}}}

    es_results = es.search(index=index_event_analysis_results,doc_type=type_event_analysis_results,\
                            body=query_body)['hits']['hits']

    for es_result in es_results:
        sen_results = es_result['_source']['sentiment_results']
        sen_geo_results_dict = json.loads(sen_results)

    during = sen_geo_results_dict['during']
    geo_counts = sen_geo_results_dict['geo_count']
    print 'geo_counts:::::::::::::', geo_counts

    for sen, geo_list in geo_counts.iteritems():
        #print 'type:::::geo_list',geo_list
        sen_geo_count_results = {}
        sen_geo_count_results[sen] = []
        if geo_list:
            for province, city_dict in geo_list.iteritems():
                sen_geo_count_results[sen].append([province, city_dict])
        else:
            continue
        sen_geo_count_results_final.append(sen_geo_count_results)
    #print 'sen_geo_count_results_final::::::::::::::::::',sen_geo_count_results_final
    return sen_geo_count_results_final
Пример #6
0
def ajax_show_all_task():
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "submit_time": {
                "order": "desc"
            }
        },
        "size": 1000
    }

    es_results = es.search(index=index_manage_event_analysis, doc_type=type_manage_event_analysis, \
            body=query_body)["hits"]["hits"]
    #print '84::::::::',es_results
    task_list = []
    '''
    for item in es_results:
        #print '96::::::::',item
        tmp = []
        item_detail = item["_source"]
        tmp.append(item_detail["task_name"])
        tmp.append(item_detail["submit_user"])
        tmp.append(item_detail["submit_time"])
        tmp.append(item_detail["start_time"])
        tmp.append(item_detail["stop_time"])
        tmp.append(item_detail["event_value_finish"])
        
        task_list.append(tmp)
    '''
    for item in es_results:
        task_list.append(item['_source'])
    return json.dumps(task_list)
Пример #7
0
def get_time_count_es(en_name, start_ts, end_ts, unit=MinInterval):

    mtype_count_results = {}
    time_results_dict = {}

    query_body = {'query': {'term': {'en_name': en_name}}}

    es_results = es.search(index=index_event_analysis_results,doc_type=type_event_analysis_results,\
                            body=query_body)['hits']['hits']

    for es_result in es_results:
        time_results = es_result['_source']['time_results']
        time_results_dict = json.loads(time_results)

    during = time_results_dict['during']
    counts = time_results_dict['count']

    #print 'counts:::::::::::',counts
    if (end_ts - start_ts < unit):
        upbound = long(math.ceil(end_ts / (unit * 1.0)) * unit)

        mtype_count_results[upbound] = {}

        for key, value in counts.iteritems():
            if key == upbound:
                for k, v in value.iteritems():
                    try:
                        mtype_count_results[upbound][k] += v
                    except:
                        mtype_count_results[upbound][k] = v

    else:
        upbound = long(math.ceil(end_ts / (unit * 1.0)) * unit)
        lowbound = long((start_ts / unit) * unit)
        interval = (upbound - lowbound) / unit
        for i in range(interval, 0, -1):
            begin_ts = upbound - unit * i
            end_ts = begin_ts + unit
            mtype_count_results[end_ts] = {}

            for key, value in counts.iteritems():

                #print 'begin_ts::::::::::::',type(begin_ts)
                #print 'key:::::::::::',type(key)
                #print 'end_ts::::::::::::',type(end_ts)
                #print 'value::::::::::::',value
                key = int(key)
                #print 'key:::::::::::',type(key)
                if key > begin_ts and key <= end_ts:
                    #print '++++++++++++++++++++++++++++++++++++++++++++++'
                    for k, v in value.iteritems():
                        #print 'k:::::::::::::::::::',k
                        #print 'v:::::::::::::::::::',v
                        try:
                            mtype_count_results[end_ts][k] += v
                        except:
                            mtype_count_results[end_ts][k] = v

    return mtype_count_results
Пример #8
0
def get_symbol_weibo(topic,start_ts,end_ts,unit=MinInterval):  #鱼骨图

    weibos = {}
    
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'term':{'name':topic}},
                    {'range':{'start_ts':{'lte':start_ts}}},
                    {'range':{'end_ts':{'gte':end_ts}}}
                ]
            }
        }
    }
    
    '''
    query_body = {
        'query':{
            'match_all':{}
        }
    } 
    '''
    print query_body
    symbol = es.search(index=topics_river_index_name,doc_type=topics_river_index_type,body=query_body)['hits']['hits'][0]['_source']
    #symbol = es.search(index=topics_river_index_name,doc_type=topics_river_index_type,body=query_body)['hits']['hits']
    print 'symbol:::',symbol
    features = json.loads(symbol['features'])
    symbol_weibos = json.loads(symbol['cluster_dump_dict'])
    #print symbol_weibos
    begin_ts = end_ts - unit
    for clusterid,contents in symbol_weibos.iteritems():
        j = 0
        content = set()
        for i in contents:

            ts = full_datetime2ts(i['datetime'])
        
            title = re.findall(r'【.*】',i['content'].encode('utf8'))
            if title:
                title = title[0]

                print 'title::',title.encode('utf-8')
                if ts >= start_ts and ts <= end_ts and title not in content:  #start_ts应该改成begin_ts,现在近15分钟没数据,所以用所有的
                    try:
                        weibos[features[clusterid][0]].append(i)
                    except:
                        weibos[features[clusterid][0]] = [i]
                    content.add(title)
                    j += 1
                #print content
                if j == 3:
                    break
            else:
                continue
    #print weibos
    return weibos
Пример #9
0
def ajax_show_analysis_task():
    query_body = {
        "query": {
            "range": {
                "submit_time": {
                    "gte": time.time() - 20 * 24 * 3600
                }
            }
        },
        "size": 1000,
        "sort": {
            "submit_time": {
                "order": "desc"
            }
        }
    }

    task_set = set()
    es_results = es_prediction.search(index=index_manage_event_analysis,doc_type=\
            type_manage_event_analysis, body=query_body)["hits"]["hits"]

    analysis_results = []
    for item in es_results:
        task_set.add(item["_source"]["pinyin_task_name"])
        analysis_results.append(item["_source"])

    prediction_results = []
    es_results = es_prediction.search(index=index_manage_prediction_task, doc_type=\
            type_manage_prediction_task,body=query_body)["hits"]["hits"]

    for item in es_results:
        task_name = item["_source"]["pinyin_task_name"]
        if task_name not in task_set:
            prediction_results.append(item["_source"])
            task_set.add(task_name)

    return_dict = dict()
    return_dict["event_analysis_task"] = analysis_results
    return_dict["event_prediction_task"] = prediction_results

    return json.dumps(return_dict)
Пример #10
0
def province_weibo_count_es(topic, start_ts, end_ts, unit=MinInterval):

    geo_count_results = {}
    geo_results_dict = {}

    query_body = {'query': {'term': {'en_name': topic}}}

    es_results = es.search(index=index_event_analysis_results,doc_type=type_event_analysis_results,\
                            body=query_body)['hits']['hits']
    print 'len:::::::::', len(es_results)
    for es_result in es_results:
        geo_results = es_result['_source']['geo_results']
        geo_results_dict = json.loads(geo_results)

        #sort_ts_attr = geo_results_dict['sort_ts_attr']
        #repost_list = geo_results_dict['repost_list']
        geo_cityTopic_results = geo_results_dict['geo_cityTopic_results']
        geo_cityCount = geo_cityTopic_results['geo_cityCount']

    #print 'geo_cityTopic_results[]:::::::::::',geo_cityTopic_results['geo_cityCount']
    '''
    if (end_ts - start_ts < unit):
        upbound = long(math.ceil(end_ts / (unit * 1.0)) * unit)

        mtype_count_results[upbound] = {}

        for key,value in counts.iteritems():
            if key == upbound:
                for k,v in value.iteritems():
                    try:
                        mtype_count_results[upbound][k] += v
                    except:
                        mtype_count_results[upbound][k] = v

    else:
        upbound = long(math.ceil(end_ts / (unit * 1.0)) * unit)
        lowbound = long((start_ts / unit) * unit)
        interval = (upbound-lowbound)/unit
        for i in range(interval, 0, -1):    
            begin_ts = upbound - unit * i
            end_ts = begin_ts + unit
            mtype_count_results[end_ts] = {}
            
            for key,value in counts.iteritems():
                if key > begin_ts and key <= end_ts:
                    for k,v in value.iteritems():
                        try:
                            mtype_count_results[end_ts][k] += v
                        except:
                            mtype_count_results[end_ts][k] = v
    '''
    #return  geo_count_results
    return geo_cityCount
Пример #11
0
def get_predict_count(task_name, start_ts, end_ts):
    query_body = {
        "query": {
            "range": {
                "update_time": {
                    "gte": start_ts,
                    "lte": end_ts
                }
            }
        },
        "sort": {
            "update_time": {
                "order": "asc"
            }
        },
        "size": 100000
    }

    index_name = 'micro_prediction_' + task_name

    results = es_prediction.search(index=index_name,
                                   doc_type="micro_task",
                                   body=query_body)["hits"]["hits"]
    return_list = []
    truth_value_list = []
    prediction_value_list = []
    ts_list = []
    for item in results:
        truth_value = item["_source"]["total_count"]
        truth_value_list.append(truth_value)
        try:
            prediction_value = item["_source"]["prediction_value"]
            prediction_value_list.append(prediction_value)
        except:
            pass
        ts = item["_source"]["update_time"]
        ts_list.append(ts)
    print len(truth_value_list), len(ts_list), len(prediction_value_list)

    prediction_value_list.insert(0, truth_value_list[0])
    for i in range(len(ts_list)):
        return_list.append(
            [ts_list[i], truth_value_list[i], prediction_value_list[i]])

    # final
    return_list.append(
        [ts_list[-1] + minimal_time_interval, 0, prediction_value_list[-1]])

    return json.dumps(return_list)
Пример #12
0
def get_macro_prediction_count(task_name):
    query_body = {'query': {'term': {'event': task_name}}}
    print 'task_name::::::', task_name
    es_results = es_prediction.search(index=index_macro_feature_result,doc_type=type_macro_feature_result,\
                body=query_body)['hits']['hits']
    weibo_count = 0
    user_count = 0
    rank = 0.0
    print 'es::::::::::::', es_results
    for es_result in es_results:
        weibo_count = es_result['_source']['predict_weibo_value']
        user_count = es_result['_source']['predict_user_value']
        rank = es_result['_source']['predict_rank']

    return weibo_count, user_count, rank
Пример #13
0
def get_trend_maker_es(topic, identifyDate, identifyWindow):

    query_body = {'query': {'term': {'en_name': topic}}}

    network_results = es.search(index=index_event_analysis_results,
                                doc_type=type_event_analysis_results,
                                body=query_body)['hits']['hits']

    for network_result in network_results:

        network_result = network_result['_source']['network_results']
        network_result = json.loads(network_result)
        maker_results = network_result['maker_results']
        for uid, info_dict in maker_results.iteritems():
            user_info = info_dict['user_info']
            user_info = json.loads(user_info)
            maker_results[uid]['user_info'] = user_info

    return maker_results
def get_task_detail_2():
    results = dict()

    time_series = []  # 时间
    origin_weibo_list = []  # 微博列表
    retweeted_weibo_list = []
    all_weibo_list = []

    query_body = {
        "query": {
            "match_all": {}
        },
        "size": 7 * 24,
        "sort": {
            "timestamp": {
                "order": "desc"
            }
        }
    }

    flow_detail = es_prediction.search(index="social_sensing_task",
                                       doc_type="social_sensing",
                                       body=query_body)["hits"]["hits"]
    flow_detail.reverse()

    if flow_detail:
        for item in flow_detail:
            item = item['_source']
            timestamp = item['timestamp']
            time_series.append(timestamp)
            origin_weibo_list.append(item["origin_weibo_number"])  # real
            retweeted_weibo_list.append(item['retweeted_weibo_number'])  # real
            all_weibo_list.append(item["origin_weibo_number"] +
                                  item['retweeted_weibo_number'])

    results['time_series'] = time_series
    results['all_weibo_list'] = all_weibo_list
    results['origin_weibo_list'] = origin_weibo_list
    results['retweeted_weibo_list'] = retweeted_weibo_list

    return results
Пример #15
0
def get_subopinion(topic):
    query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'term':{
                        'name':topic
                    }
                }
            }
        }
    }
    features = es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits']
    print 'features::::::',features[0]
    print 'features_keys::::::',features[0]['_source'].keys()
    if features:
        feature = json.loads(features[0]['_source']['features'])
        print 'feature.values()::::::::::::::',feature.values()
        return feature.values()
    else:
        return 'no results'
Пример #16
0
def ajax_show_task():
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": 10000,
        "sort": {
            "submit_time": {
                "order": "desc"
            }
        }
    }

    results = es_prediction.search(index=index_manage_prediction_task,
                                   doc_type=type_manage_prediction_task,
                                   body=query_body)["hits"]["hits"]

    task_list = []
    for item in results:
        task_list.append(item["_source"])

    return json.dumps(task_list)
Пример #17
0
def get_topics_river(topic,start_ts,end_ts,unit=MinInterval):#主题河
    #topic='event'
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'term':{'name':topic}},
                    {'range':{'start_ts':{'lte':start_ts}}},
                    {'range':{'end_ts':{'gte':end_ts}}}
                ]
            }
        }
    }
    #print '????',query_body
    #print es.search(index=topics_river_index_name,doc_type=topics_river_index_type,body=query_body)['hits']['hits']
    news_topics = json.loads(es.search(index=topics_river_index_name,doc_type=topics_river_index_type,body=query_body)['hits']['hits'][0]['_source']['features'])
    zhutihe_results = cul_key_weibo_time_count(topic,news_topics,start_ts,end_ts,unit)
    results = {}
    for k,v in news_topics.iteritems():
        if len(v)>0:
            results[v[0]] = zhutihe_results[k]
    return results
Пример #18
0
def get_during_keywords_es(topic,start_ts,end_ts):  #关键词云,unit=MinInterval
    keywords = []
    # if (end_ts-start_ts)>unit:
    #     begin_ts = end_ts-unit
    # else:
    #     begin_ts = start_ts
    # print begin_ts,end_ts
    query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'range':{
                        'timestamp':{'gte': start_ts, 'lt':end_ts} 
                    }
                }
            }
        },
        'size':MAX_LANGUAGE_WEIBO
    }
    keywords_dict = {}
    weibo_text = []
    keyword_weibo = es.search(index=topic,doc_type='text',body=query_body)['hits']['hits']   
    for key_weibo in keyword_weibo:
        weibo_text.append(key_weibo['_source']['text'].encode('utf-8'))
    keywords_dict = get_weibo(weibo_text,n_gram=2,n_count=100)
    '''
    print keyword_weibo
    for key_weibo in keyword_weibo:
        keywords_dict_list = json.loads(key_weibo['_source']['keywords_dict'])  #
        #print keywords_dict_list,type(keywords_dict_list)
        for k,v in keywords_dict_list.iteritems():
            try:
                keywords_dict[k] += v
            except:
                keywords_dict[k] = v
    '''
    word_results = sorted(keywords_dict.iteritems(),key=lambda x:x[1],reverse=True)[:MAX_FREQUENT_WORDS]   
    return json.dumps(word_results)      
Пример #19
0
def cul_key_weibo_time_count(topic,news_topics,start_ts,over_ts,during):
    key_weibo_time_count = {}
    time_dict = {}
    during = Day
    for clusterid,keywords in news_topics.iteritems(): #{u'd2e97cf7-fc43-4982-8405-2d215b3e1fea': [u'\u77e5\u8bc6', u'\u5e7f\u5dde', u'\u9009\u624b']}
        if len(keywords)>0:
            start_ts = int(start_ts)
            over_ts = int(over_ts)

            over_ts = ts2HourlyTime(over_ts, during)
            interval = (over_ts - start_ts) / during


            for i in range(interval, 0, -1):    #时间段取每900秒的

                begin_ts = over_ts - during * i
                end_ts = begin_ts + during
                must_list=[]
                must_list.append({'range':{'timestamp':{'gte':begin_ts,'lt':end_ts}}})
                temp = []
                for word in keywords:
                    sentence =  {'wildcard':{'keywords_string':'*'+word+'*'}}
                    temp.append(sentence)
                must_list.append({'bool':{'should':temp}})

                query_body = {'query':{
                                'bool':{
                                    'must':must_list
                                }
                            }
                        }
                key_weibo = es.search(index=topic,doc_type='text',body=query_body)
                key_weibo_count = key_weibo['hits']['total']  #分时间段的类的数量
                time_dict[ts2datetime(end_ts)] = key_weibo_count

            key_weibo_time_count[clusterid] = sorted(time_dict.items(),key=lambda x:x[0])
    return key_weibo_time_count
Пример #20
0
def get_weibo_content_es(topic,
                         start_ts,
                         end_ts,
                         province,
                         sort_item='timestamp',
                         unit=Fifteenminutes):

    weibo_dict = {}

    query_body = {'query': {'term': {'en_name': topic}}}

    es_results = es.search(index=index_event_analysis_results,doc_type=type_event_analysis_results,\
                            body=query_body)['hits']['hits']

    for es_result in es_results:
        geo_results = es_result['_source']['geo_results']
        geo_results_dict = json.loads(geo_results)

        #sort_ts_attr = geo_results_dict['sort_ts_attr']
        #repost_list = geo_results_dict['repost_list']
        geo_cityTopic_results = geo_results_dict['geo_cityTopic_results']
        geo_weibos = geo_cityTopic_results['geo_weibos']

        #geo_weibos = _json_loads(geo_weibos)

    for weibo_type, geo_weibo_list in geo_weibos.iteritems():
        for geo_weibo in geo_weibo_list:
            province_weibo = geo_weibo[0]
            city = geo_weibo[1]
            weibo = geo_weibo[2]

            #print 'weibo::::::::::::::::::',weibo
            if province_weibo == province:
                weibo_content = {}
                weibo_content['text'] = weibo['_source']['text']
                weibo_content['uid'] = weibo['_source']['uid']
                weibo_content['timestamp'] = weibo['_source']['timestamp']
                weibo_content['sentiment'] = weibo['_source']['sentiment']
                weibo_content['comment'] = weibo['_source']['comment']
                weibo_content['retweeted'] = weibo['_source']['retweeted']
                weibo_content['keywords'] = weibo['_source']['keywords_dict']
                weibo_content['mid'] = weibo['_source']['mid']
                try:
                    user = es_user_portrait.get(
                        index=profile_index_name,
                        doc_type=profile_index_type,
                        id=weibo_content['uid'])['_source']
                    weibo_content['uname'] = user['nick_name']
                    weibo_content['photo_url'] = user['photo_url']
                except:
                    weibo_content['uname'] = 'unknown'
                    weibo_content['photo_url'] = 'unknown'
                weibo_dict[weibo_content['mid']] = weibo_content
    try:
        results = sorted(weibo_dict.items(),
                         key=lambda x: x[1][sort_item],
                         reverse=True)
    except:
        results = []
    #print results
    return results
Пример #21
0
def get_weibo_by_time_es(topic, start_ts, end_ts, sort_item='timestamp'):
    #print topic,start_ts,end_ts,weibo_es
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'range': {
                        'timestamp': {
                            'lte': int(end_ts),
                            'gte': int(start_ts)
                        }
                    }
                }]
            }
        },
        'size': 200,
        'sort': {
            sort_item: {
                'order': 'desc'
            }
        }
    }
    items = es.search(index=topic, body=query_body)['hits']['hits']
    #items = db.session.query(PropagateWeibos).filter(PropagateWeibos.topic==topic).all()
    weibo_dict = {}
    if items:
        for item in items:
            #print item,type(item)
            #mtype = item.mtype
            #weibos = _json_loads(item.weibos)
            weibo = item['_source']
            #print mtype
            #print 'weibo"""""""""""""""',weibo
            weibo_content = {}
            weibo_content['text'] = weibo['text']
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = weibo['timestamp']
            #weibo_content['sentiment'] = weibo['sentiment']
            try:
                weibo_content['comment'] = weibo['comment']
            except:
                weibo_content['comment'] = 0
            try:
                weibo_content['retweeted'] = weibo['retweeted']
            except:
                weibo_content['retweeted'] = 0
            try:
                user = es_user_portrait.get(index=profile_index_name,
                                            doc_type=profile_index_type,
                                            id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            #weibo_content['keywords'] = weibo['keywords_dict']
            weibo_content['mid'] = weibo['mid']
            #print weibo_content
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),
                         key=lambda x: x[1][sort_item],
                         reverse=True)

    else:
        results = []
        #results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False)
        #for result in results:
        #print result

    return results
Пример #22
0
def get_sen_province_count_es(en_name, start_ts, end_ts, unit=MinInterval):

    sen_geo_count_results = {}
    sen_geo_results_dict = {}

    query_body = {'query': {'term': {'en_name': en_name}}}

    es_results = es.search(index=index_event_analysis_results,doc_type=type_event_analysis_results,\
                            body=query_body)['hits']['hits']

    for es_result in es_results:
        sen_results = es_result['_source']['sentiment_results']
        sen_geo_results_dict = json.loads(sen_results)

    during = sen_geo_results_dict['during']
    geo_counts = sen_geo_results_dict['geo_count']
    '''
    for i_sen in [0,1,2,3,4,5,6,7]:
        sen_geo_count_results[i_sen] = {}
    '''

    for end_ts, sen_geo_dict in geo_counts.iteritems():
        for sen, geo_list in sen_geo_dict.iteritems():
            print 'type:::::geo_list', geo_list
            if geo_list:
                for province, city_dict in geo_list.iteritems():
                    for k, v in city_dict.iteritems():

                        if k == 'total':
                            try:
                                sen_geo_count_results[sen][province][
                                    'total'] += v
                            except:
                                try:
                                    sen_geo_count_results[sen][province][
                                        'total'] = v
                                except:

                                    #sen_geo_count_results[sen] = {province:{'total':v}}
                                    try:
                                        sen_geo_count_results[sen][
                                            province] = {
                                                'total': v
                                            }
                                    except:
                                        sen_geo_count_results[sen] = {
                                            province: {
                                                'total': v
                                            }
                                        }

                        else:
                            try:
                                sen_geo_count_results[sen][province][k] += v
                            except:
                                #sen_geo_count_results[sen][province][k] = v

                                try:
                                    sen_geo_count_results[sen][province][k] = v
                                except:
                                    #sen_geo_count_results[sen] = {province:{k:v}}
                                    try:
                                        sen_geo_count_results[sen][
                                            province] = {
                                                k: v
                                            }
                                    except:
                                        sen_geo_count_results[sen] = {
                                            province: {
                                                k: v
                                            }
                                        }
                        '''
                        if k == 'total':
                            continue
                        try:
                            sen_geo_count_results[sen][province]['total'] += v
                        except:
                            try:
                                sen_geo_count_results[sen][province] = {'total':v}
                            except:
                                sen_geo_count_results[sen] = {province:{'total':v}}

                        try:
                            sen_geo_count_results[sen][province][k] += v
                        except:
                            #sen_geo_count_results[sen][province][k] = v
                            
                            try:
                                sen_geo_count_results[sen][province] = {k:v}
                            except:
                                sen_geo_count_results[sen] = {province:{k:v}}
                        '''

            else:
                continue

    #print 'sen_geo_count_results.keys():',sen_geo_count_results.keys()
    '''
    for key in sen_geo_count_results.keys():

        for k in sen_geo_count_results[key].keys():
            print sen_geo_count_results[key][k].keys()
    '''
    #print 'sen_geo_count_results:::::::::::::::::::::::',sen_geo_count_results
    return sen_geo_count_results
    '''
Пример #23
0
def get_weibo_content_es(topic, begin_ts, end_ts, sen, sort_item='timestamp'):
    #print topic
    #sentiments = SENTIMENT_FIRST + SENTIMENT_SECOND
    all_sen_weibo = {}

    #for sentiment in sentiments:
    province_dict = {}
    query_body = {
        'query': {
            'bool': {
                'must': [
                    {
                        'term': {
                            'sentiment': sen
                        }
                    },  #一个话题,不同情绪下给定时间里按关键词聚合
                    {
                        'range': {
                            'timestamp': {
                                'gte': begin_ts,
                                'lt': end_ts
                            }
                        }
                    }
                ]
            }
        },
        'sort': {
            sort_item: {
                "order": "desc"
            }
        }
    }
    items = es.search(index=topic, doc_type='text',
                      body=query_body)['hits']['hits']  #字典

    weibo_dict = {}
    if items:
        for item in items:
            #print item,type(item)
            #mtype = item.mtype
            #weibos = _json_loads(item.weibos)
            weibo = item['_source']
            #print mtype
            #print 'weibo"""""""""""""""',weibo
            weibo_content = {}
            weibo_content['text'] = weibo['text']
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = weibo['timestamp']
            #weibo_content['sentiment'] = weibo['sentiment']
            try:
                weibo_content['comment'] = weibo['comment']
            except:
                weibo_content['comment'] = 0
            try:
                weibo_content['retweeted'] = weibo['retweeted']
            except:
                weibo_content['retweeted'] = 0
            try:
                user = es_user_portrait.get(index=profile_index_name,
                                            doc_type=profile_index_type,
                                            id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            #weibo_content['keywords'] = weibo['keywords_dict']
            weibo_content['mid'] = weibo['mid']
            #print weibo_content
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),
                         key=lambda x: x[1][sort_item],
                         reverse=True)

    else:
        results = []
        #results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False)
        #for result in results:
        #print result
    '''
    if len(sentiment_weibo) > 0:
        
        all_sen_weibo[sen] = []
        for i in range(0,len(sentiment_weibo)):
            #print sentiment_weibo[i]['_source']['retweeted']
            all_sen_weibo[sen].append(sentiment_weibo[i]['_source'])
        
    else:
        all_sen_weibo[sen] = []
        
    '''
    #return all_sen_weibo
    return results
Пример #24
0
def get_weibo_content(topic,start_ts,end_ts,opinion,sort_item='timestamp'): #微博内容
    weibo_dict = {}
    #a = json.dumps(opinion)
    #opinion = '圣保罗_班底_巴西_康熙'
    #[u'毛泽东', u'纪念日', u'亲人', u'毛泽东思想', u'万岁']
    #opinion = json.dumps(opinion)
    #opinion = '毛泽东_纪念日_亲人_毛泽东思想_万岁'
    # opinion_str = opinion[0]
    # opinion_str = '_'.join(opinion)
    # for i in range(1,len(opinion)):
    #     opinion_str = opinion_str + '_' + opinion_str
    # query_body = {
    #     'query':{
    #         'bool':{
    #             'must':[
    #                 {'wildcard':{'keys':opinion}},
    #                 {'term':{'name':topic}},
    #                 {'range':{'start_ts':{'lte':start_ts}}},
    #                 {'range':{'end_ts':{'gte':end_ts}}}
    #             ]
    #         }
    #     },
    #     'size':100000
    # }  #没有查到uid   每次的id不一样   
    # print 'opinion:::::::;;',opinion
    # start_ts = int(start_ts)
    query_body = {
        'query':{
            'bool':{
                'must':[
                    # {'wildcard':{'keys':'*'+opinion+'*'}},
                    {"match_phrase":{"keys": opinion}},
                    #{'term':{'keys':opinion}},
                    {'term':{'name':topic}},
                    {'range':{'start_ts':{'gte':start_ts}}},
                    {'range':{'end_ts':{'lte':end_ts}}}
                ]
            }
        },
        'size':1000000
    }  #没有查到uid   每次的id不一样   
    # query_body = {
    #     'query':{
    #         'match_all':{}
    #     },
    #     'size':1000000
    # }
    print 'query_body:::::;',query_body
    weibos = es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits']
    #print weibo_es,subopinion_index_name,subopinion_index_type,query_body
    print len(weibos)
    #keys_list = []
    for weibo in weibos:
        print weibo['_source'].keys()
        print 'start_ts:::::::::',weibo["_source"]["start_ts"]
        print 'end_ts:::::::::::',weibo["_source"]["end_ts"]
        print 'name:::::::::::::',weibo['_source']["name"]
        print 'keys:::::::::::::',weibo["_source"]["keys"]
    #print 'keys_list:::::::',keys_list
    print 'opinion:::::',opinion
    print 'start_ts::::',start_ts
    print 'end_ts::::::',end_ts
    print 'topic:::::::',topic
    if weibos:
        # print 'weibos:::::::::::;',weibos[0]['_source']['keys']
        weibos = json.loads(weibos[0]['_source']['cluster_dump_dict'])
        for weibo in weibos.values():#jln0825
            weibo = weibo[0]
            weibo_content = {}
            weibo_content['text'] = weibo['text'] 
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = full_datetime2ts(weibo['datetime'])
            weibo_content['comment'] = weibo['comment']
            weibo_content['retweeted'] = weibo['retweeted']
            weibo_content['mid'] = weibo['id']
            try:
                user = es.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True)
        #print results
        return results
    else:
        return 'no results'