Пример #1
0
def get_subopinion_new(topic,start_ts,end_ts,sort_item='timestamp'):
    # query_body = {
    #     'query':{
    #         'filtered':{
    #             'filter':{
    #                 'bool':{
    #                     'must':[
    #                         {'match_phrase':{'name':topic}}
    #                     ]
    #                 }
    #             }
    #         }
    #     }
    # }
    query_body = {
        "query": {
          "match_phrase": {
            "name": topic
          }
        }
    }
    # print query_body
    results = []
    print topic
    weibos = weibo_es.get(index=subopinion_index_name,id=topic,doc_type="text")['_source']

    print weibos
    for k,weibo in weibos.items():
        if k != 'start_ts' and k !='end_ts':
            result = {}
            weibo_dict ={}
    
            raw_keys=weibo['keys']
            raw_keys = raw_keys.split('_')
            result['keys']=get_mode(raw_keys)
    
            weibos_texts = json.loads(weibo['cluster_dump_dict'])
            for weibo_text in weibos_texts.values():#jln0825
                weibo_text = weibo_text[0]
                weibo_content = {}
                weibo_content['text'] = weibo_text['text'] 
                weibo_content['uid'] = weibo_text['uid']
                weibo_content['timestamp'] = full_datetime2ts(weibo_text['datetime'])
                weibo_content['comment'] = weibo_text['comment']
                weibo_content['retweeted'] = weibo_text['retweeted']
                weibo_content['mid'] = weibo_text['id']
                try:
                    user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                    weibo_content['uname'] = user['nick_name']
                    weibo_content['photo_url'] = user['photo_url']
                except:
                    weibo_content['uname'] = 'unknown'
                    weibo_content['photo_url'] = 'unknown'
                weibo_dict[weibo_content['mid']] = weibo_content
    
            result['weibos'] = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True)
            results.append(result)


    return results
Пример #2
0
def get_symbol_weibo(topic,start_ts,end_ts,unit=MinInterval):  #鱼骨图
    # print topic
    weibos = {}
    # query_body = {
    #     'query':{
    #         'bool':{
    #             'must':[
    #                 {'term':{'name':topic}},
    #                 {'range':{'start_ts':{'lte':start_ts}}},
    #                 {'range':{'end_ts':{'gte':end_ts}}}
    #             ]
    #         }
    #     }
    # }
    query_body = {
        "query": {
          "match_phrase": {
            "name": topic
          }
        }
    }

    try:
        symbol = weibo_es.search(index=topics_river_index_name,doc_type=topics_river_index_type,body=query_body)['hits']['hits'][0]['_source']
    except:
        return 0
    features = json.loads(symbol['features'])
    symbol_weibos = json.loads(symbol['cluster_dump_dict'])
    # print 'dadfhsjdbfsdjdh'
    # print symbol_weibos
    begin_ts = end_ts - unit
    for clusterid,contents in symbol_weibos.iteritems():
        j = 0
        content = set()
        for i in contents:
            ts = full_datetime2ts(i['datetime'])
            # title = re.findall(r'【.*】',i['content'].encode('utf8'))[0]
            title = i['content'][:7]
            if ts >= start_ts and ts <= end_ts and title not in content:  #start_ts应该改成begin_ts,现在近15分钟没数据,所以用所有的
                try:
                    weibos[','.join(features[clusterid][:5])].append(i)
                except:
                    weibos[','.join(features[clusterid][:5])] = [i]
                content.add(title)
                j += 1
            #print content
            if j == 3:
                break
    #print weibos
    return weibos
Пример #3
0
def get_weibo_content(topic,start_ts,end_ts,opinion,sort_item='timestamp'): #微博内容
    weibo_dict = {}
    #a = json.dumps(opinion)
    #opinion = '圣保罗_班底_巴西_康熙'
    print 'opinion:::::::::',opinion
    print 'topic:::::::;:::',topic
    print type(start_ts),type(end_ts)
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'wildcard':{'keys':opinion}},
                    {'term':{'name':topic}},
                    {'range':{'start_ts':{'lte':start_ts}}},
                    {'range':{'end_ts':{'gte':end_ts}}}
                ]
            }
        }
    }  #没有查到uid   每次的id不一样   
    print query_body
    weibos = weibo_es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits']
    #print weibo_es,subopinion_index_name,subopinion_index_type,query_body
    print len(weibos)
    if weibos:
        weibos = json.loads(weibos[0]['_source']['cluster_dump_dict'])
        for weibo in weibos.values():#jln0825
            weibo = weibo[0]
            weibo_content = {}
            weibo_content['text'] = weibo['text'] 
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = full_datetime2ts(weibo['datetime'])
            weibo_content['comment'] = weibo['comment']
            weibo_content['retweeted'] = weibo['retweeted']
            weibo_content['mid'] = weibo['id']
            try:
                user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True)
        #print results
        return results
    else:
        return 'no results'
Пример #4
0
def get_symbol_weibo(topic, start_ts, end_ts, unit=MinInterval):  #鱼骨图
    # print topic
    weibos = {}
    # query_body = {
    #     'query':{
    #         'bool':{
    #             'must':[
    #                 {'term':{'name':topic}},
    #                 {'range':{'start_ts':{'lte':start_ts}}},
    #                 {'range':{'end_ts':{'gte':end_ts}}}
    #             ]
    #         }
    #     }
    # }
    query_body = {"query": {"match_phrase": {"name": topic}}}

    try:
        symbol = weibo_es.search(index=topics_river_index_name,
                                 doc_type=topics_river_index_type,
                                 body=query_body)['hits']['hits'][0]['_source']
    except:
        return 0
    features = json.loads(symbol['features'])
    symbol_weibos = json.loads(symbol['cluster_dump_dict'])
    # print 'dadfhsjdbfsdjdh'
    # print symbol_weibos
    begin_ts = end_ts - unit
    for clusterid, contents in symbol_weibos.iteritems():
        j = 0
        content = set()
        for i in contents:
            ts = full_datetime2ts(i['datetime'])
            # title = re.findall(r'【.*】',i['content'].encode('utf8'))[0]
            title = i['content'][:7]
            if ts >= start_ts and ts <= end_ts and title not in content:  #start_ts应该改成begin_ts,现在近15分钟没数据,所以用所有的
                try:
                    weibos[','.join(features[clusterid][:5])].append(i)
                except:
                    weibos[','.join(features[clusterid][:5])] = [i]
                content.add(title)
                j += 1
            #print content
            if j == 3:
                break
    #print weibos
    return weibos
Пример #5
0
def get_weibo_content(topic,
                      start_ts,
                      end_ts,
                      opinion,
                      sort_item='timestamp'):  #微博内容
    weibo_dict = {}
    #a = json.dumps(opinion)
    #opinion = '圣保罗_班底_巴西_康熙'
    print 'opinion:::::::::', opinion
    print 'topic:::::::;:::', topic
    print type(start_ts), type(end_ts)
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'wildcard': {
                        'keys': opinion
                    }
                }, {
                    'term': {
                        'name': topic
                    }
                }, {
                    'range': {
                        'start_ts': {
                            'lte': start_ts
                        }
                    }
                }, {
                    'range': {
                        'end_ts': {
                            'gte': end_ts
                        }
                    }
                }]
            }
        }
    }  #没有查到uid   每次的id不一样
    print query_body
    weibos = weibo_es.search(index=subopinion_index_name,
                             doc_type=subopinion_index_type,
                             body=query_body)['hits']['hits']
    #print weibo_es,subopinion_index_name,subopinion_index_type,query_body
    print len(weibos)
    if weibos:
        weibos = json.loads(weibos[0]['_source']['cluster_dump_dict'])
        for weibo in weibos.values():  #jln0825
            weibo = weibo[0]
            weibo_content = {}
            weibo_content['text'] = weibo['text']
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = full_datetime2ts(weibo['datetime'])
            weibo_content['comment'] = weibo['comment']
            weibo_content['retweeted'] = weibo['retweeted']
            weibo_content['mid'] = weibo['id']
            try:
                user = es_user_profile.get(index=profile_index_name,
                                           doc_type=profile_index_type,
                                           id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),
                         key=lambda x: x[1][sort_item],
                         reverse=True)
        #print results
        return results
    else:
        return 'no results'
Пример #6
0
def get_subopinion_new(topic, start_ts, end_ts, sort_item='timestamp'):
    # query_body = {
    #     'query':{
    #         'filtered':{
    #             'filter':{
    #                 'bool':{
    #                     'must':[
    #                         {'match_phrase':{'name':topic}}
    #                     ]
    #                 }
    #             }
    #         }
    #     }
    # }
    query_body = {"query": {"match_phrase": {"name": topic}}}
    # print query_body
    results = []
    print topic
    weibos = weibo_es.get(index=subopinion_index_name,
                          id=topic,
                          doc_type="text")['_source']

    print weibos
    for k, weibo in weibos.items():
        if k != 'start_ts' and k != 'end_ts':
            result = {}
            weibo_dict = {}

            raw_keys = weibo['keys']
            raw_keys = raw_keys.split('_')
            result['keys'] = get_mode(raw_keys)

            weibos_texts = json.loads(weibo['cluster_dump_dict'])
            for weibo_text in weibos_texts.values():  #jln0825
                weibo_text = weibo_text[0]
                weibo_content = {}
                weibo_content['text'] = weibo_text['text']
                weibo_content['uid'] = weibo_text['uid']
                weibo_content['timestamp'] = full_datetime2ts(
                    weibo_text['datetime'])
                weibo_content['comment'] = weibo_text['comment']
                weibo_content['retweeted'] = weibo_text['retweeted']
                weibo_content['mid'] = weibo_text['id']
                try:
                    user = es_user_profile.get(
                        index=profile_index_name,
                        doc_type=profile_index_type,
                        id=weibo_content['uid'])['_source']
                    weibo_content['uname'] = user['nick_name']
                    weibo_content['photo_url'] = user['photo_url']
                except:
                    weibo_content['uname'] = 'unknown'
                    weibo_content['photo_url'] = 'unknown'
                weibo_dict[weibo_content['mid']] = weibo_content

            result['weibos'] = sorted(weibo_dict.items(),
                                      key=lambda x: x[1][sort_item],
                                      reverse=True)
            results.append(result)

    return results