Exemplo n.º 1
0
def search_topics(keyword):
    result = []
    query_body = {
        'query': {
            'bool': {
                'should': [{
                    'wildcard': {
                        'en_name': '*' + keyword + '*'
                    }
                }, {
                    'wildcard': {
                        'name': '*' + keyword + '*'
                    }
                }]
            }
        }
    }
    print query_body
    results = weibo_es.search(index=topic_index_name,
                              doc_type=topic_index_type,
                              body=query_body)
    if results:
        topics = results['hits']['hits']
        for topic in topics:
            result.append(topic['_source'])
            #try:
            #    result[topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            #except:
            #    result[topic['_source']['en_name']] = [[topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']]]
    return json.dumps(result)
Exemplo n.º 2
0
def get_during_keywords(topic,start_ts,end_ts):  #关键词云,unit=MinInterval
    keywords = []
    # if (end_ts-start_ts)>unit:
    #     begin_ts = end_ts-unit
    # else:
    #     begin_ts = start_ts
    # print begin_ts,end_ts
    query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'range':{
                        'timestamp':{'gte': start_ts, 'lt':end_ts} 
                    }
                }
            }
        },
        'size':MAX_LANGUAGE_WEIBO
    }
    keywords_dict = {}
    keyword_weibo = weibo_es.search(index=topic,doc_type=weibo_index_type,body=query_body)['hits']['hits']   
    print keyword_weibo
    for key_weibo in keyword_weibo:
        keywords_dict_list = json.loads(key_weibo['_source']['keywords_dict'])  #
        #print keywords_dict_list,type(keywords_dict_list)
        for k,v in keywords_dict_list.iteritems():
            try:
                keywords_dict[k] += v
            except:
                keywords_dict[k] = v
    word_results = sorted(keywords_dict.iteritems(),key=lambda x:x[1],reverse=True)[:MAX_FREQUENT_WORDS]   
    return json.dumps(word_results)      
Exemplo n.º 3
0
def get_subopinion(topic, start_ts, end_ts):
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'term': {
                                'name': topic
                            }
                        }, {
                            'term': {
                                'start_ts': start_ts
                            }
                        }, {
                            'term': {
                                'end_ts': end_ts
                            }
                        }]
                    }
                }
            }
        }
    }
    print '6666666666', topic
    features = weibo_es.search(index=subopinion_index_name,
                               doc_type=subopinion_index_type,
                               body=query_body)['hits']['hits']
    if features:
        feature = json.loads(features[0]['_source']['features'])
        return feature.values()
    else:
        return 'no results'
Exemplo n.º 4
0
def get_topics(user):
    results = {'recommend':{},'own':{}}
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'bool':{
                        'must':[{'term':{'comput_status':1}}],
                        'must_not':[{'term':{'submit_user':user}}]
                    }
                }
            }
        },
        'sort':{'submit_ts':{'order':'desc'}},
        'size':1000
        
    }
    topics = weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_body)
    if topics:
        topics = topics['hits']['hits']
        for topic in topics:
            try:
                results['recommend'][topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            except:
                results['recommend'][topic['_source']['en_name']] = [[topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']]]
    query_own = {
        'query':{
            'filtered':{
                'filter':{
                    'term':{'submit_user':user}
                }
            }
        },
        "size": 1000
    }
    own_topics =  weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_own)
    if own_topics:
        topics = own_topics['hits']['hits']
        for topic in topics:
            try:
                results['own'][topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            except:
                results['own'][topic['_source']['en_name']] = [[topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']]]

            #print results
    return json.dumps(results)
Exemplo n.º 5
0
def get_subopinion(topic):
    query_body = {'query': {'filtered': {'filter': {'term': {'name': topic}}}}}
    features = weibo_es.search(index=subopinion_index_name,
                               doc_type=subopinion_index_type,
                               body=query_body)['hits']['hits']
    if features:
        feature = json.loads(features[0]['_source']['features'])
        return feature.values()
    else:
        return 'no results'
Exemplo n.º 6
0
def get_weibo_by_time(topic,start_ts,end_ts,sort_item='timestamp'):
    print topic,start_ts,end_ts,weibo_es
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'range':{'timestamp':{'lte':int(end_ts),'gte':int(start_ts)}}}
                ]
            }
        },
        'size':200,
        'sort':{sort_item:{'order':'desc'}}
    }
    items = weibo_es.search(index=topic,body=query_body)['hits']['hits']
    #items = db.session.query(PropagateWeibos).filter(PropagateWeibos.topic==topic).all()
    weibo_dict = {}
    if items:
        for item in items:  
            #print item,type(item)
            #mtype = item.mtype
            #weibos = _json_loads(item.weibos)
            weibo = item['_source']
            #print mtype
            weibo_content = {}
            weibo_content['text'] = weibo['text'] 
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = weibo['timestamp']
            #weibo_content['sentiment'] = weibo['sentiment'] 
            try:
                weibo_content['comment'] = weibo['comment']
            except:
                weibo_content['comment'] = 0
            try:
                weibo_content['retweeted'] = weibo['retweeted']
            except:
                weibo_content['retweeted'] = 0
            try:
                user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            #weibo_content['keywords'] = weibo['keywords_dict']
            weibo_content['mid'] = weibo['mid']
            #print weibo_content
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False)

    else:
        results = []   
        #results = sorted(weibo_dict.items(),key=lambda x:x[1]['retweeted'],reverse=False)
        #for result in results:
            #print result
    return results
Exemplo n.º 7
0
def get_topics_river(topic, start_ts, end_ts, unit=MinInterval):  #主题河
    #topic='event'
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'term': {
                        'name': topic
                    }
                }, {
                    'range': {
                        'start_ts': {
                            'lte': start_ts
                        }
                    }
                }, {
                    'range': {
                        'end_ts': {
                            'gte': end_ts
                        }
                    }
                }]
            }
        }
    }
    #print '????',query_body
    print weibo_es.search(index=topics_river_index_name,
                          doc_type=topics_river_index_type,
                          body=query_body)['hits']['hits']
    news_topics = json.loads(
        weibo_es.search(
            index=topics_river_index_name,
            doc_type=topics_river_index_type,
            body=query_body)['hits']['hits'][0]['_source']['features'])
    zhutihe_results = cul_key_weibo_time_count(topic, news_topics, start_ts,
                                               end_ts, unit)
    results = {}
    for k, v in news_topics.iteritems():
        if len(v) > 0:
            results[v[0]] = zhutihe_results[k]
    return results
Exemplo n.º 8
0
def get_symbol_weibo(topic, start_ts, end_ts, unit=MinInterval):  #鱼骨图
    weibos = {}
    query_body = {
        'query': {
            'bool': {
                'must': [{
                    'term': {
                        'name': topic
                    }
                }, {
                    'range': {
                        'start_ts': {
                            'lte': start_ts
                        }
                    }
                }, {
                    'range': {
                        'end_ts': {
                            'gte': end_ts
                        }
                    }
                }]
            }
        }
    }
    symbol = weibo_es.search(index=topics_river_index_name,
                             doc_type=topics_river_index_type,
                             body=query_body)['hits']['hits'][0]['_source']
    features = json.loads(symbol['features'])
    symbol_weibos = json.loads(symbol['cluster_dump_dict'])
    #print symbol_weibos
    begin_ts = end_ts - unit
    for clusterid, contents in symbol_weibos.iteritems():
        j = 0
        content = set()
        for i in contents:
            ts = full_datetime2ts(i['datetime'])
            title = re.findall(r'【.*】', i['content'].encode('utf8'))[0]
            if ts >= start_ts and ts <= end_ts and title not in content:  #start_ts应该改成begin_ts,现在近15分钟没数据,所以用所有的
                try:
                    weibos[features[clusterid][0]].append(i)
                except:
                    weibos[features[clusterid][0]] = [i]
                content.add(title)
                j += 1
            #print content
            if j == 3:
                break
    #print weibos
    return weibos
Exemplo n.º 9
0
def search_topic_by_topic(topic):
    result = []
    query_body = {'query': {'term': {'name': topic}}}
    results = weibo_es.search(index=topic_index_name,
                              doc_type=topic_index_type,
                              body=query_body)
    if results:
        topics = results['hits']['hits']
        for topic in topics:
            result.append(topic['_source'])
            #try:
            #    result[topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            #except:
    return json.dumps(result)
Exemplo n.º 10
0
def cul_key_weibo_time_count(topic, news_topics, start_ts, over_ts, during):
    key_weibo_time_count = {}
    time_dict = {}
    during = Day
    for clusterid, keywords in news_topics.iteritems(
    ):  #{u'd2e97cf7-fc43-4982-8405-2d215b3e1fea': [u'\u77e5\u8bc6', u'\u5e7f\u5dde', u'\u9009\u624b']}
        if len(keywords) > 0:
            start_ts = int(start_ts)
            over_ts = int(over_ts)

            over_ts = ts2HourlyTime(over_ts, during)
            interval = (over_ts - start_ts) / during

            for i in range(interval, 0, -1):  #时间段取每900秒的

                begin_ts = over_ts - during * i
                end_ts = begin_ts + during
                must_list = []
                must_list.append(
                    {'range': {
                        'timestamp': {
                            'gte': begin_ts,
                            'lt': end_ts
                        }
                    }})
                temp = []
                for word in keywords:
                    sentence = {
                        'wildcard': {
                            'keywords_string': '*' + word + '*'
                        }
                    }
                    temp.append(sentence)
                must_list.append({'bool': {'should': temp}})

                query_body = {'query': {'bool': {'must': must_list}}}
                key_weibo = weibo_es.search(index=topic,
                                            doc_type=weibo_index_type,
                                            body=query_body)
                key_weibo_count = key_weibo['hits']['total']  #分时间段的类的数量
                time_dict[ts2datetime(end_ts)] = key_weibo_count

            key_weibo_time_count[clusterid] = sorted(time_dict.items(),
                                                     key=lambda x: x[0])
    return key_weibo_time_count
Exemplo n.º 11
0
def get_weibo_content(topic,start_ts,end_ts,opinion,sort_item='timestamp'): #微博内容
    weibo_dict = {}
    #a = json.dumps(opinion)
    #opinion = '圣保罗_班底_巴西_康熙'
    query_body = {
        'query':{
            'bool':{
                'must':[
                    {'wildcard':{'keys':opinion}},
                    {'term':{'name':topic}},
                    {'range':{'start_ts':{'lte':start_ts}}},
                    {'range':{'end_ts':{'gte':end_ts}}}
                ]
            }
        }
    }  #没有查到uid   每次的id不一样   
    weibos = weibo_es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits']
    #print weibo_es,subopinion_index_name,subopinion_index_type,query_body
    print len(weibos)
    if weibos:
        weibos = json.loads(weibos[0]['_source']['cluster_dump_dict'])
        for weibo in weibos.values():#jln0825
            weibo = weibo[0]
            weibo_content = {}
            weibo_content['text'] = weibo['text'] 
            weibo_content['uid'] = weibo['uid']
            weibo_content['timestamp'] = full_datetime2ts(weibo['datetime'])
            weibo_content['comment'] = weibo['comment']
            weibo_content['retweeted'] = weibo['retweeted']
            weibo_content['mid'] = weibo['id']
            try:
                user = es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source']
                weibo_content['uname'] = user['nick_name']
                weibo_content['photo_url'] = user['photo_url']
            except:
                weibo_content['uname'] = 'unknown'
                weibo_content['photo_url'] = 'unknown'
            weibo_dict[weibo_content['mid']] = weibo_content
        results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True)
        #print results
        return results
    else:
        return 'no results'
Exemplo n.º 12
0
def get_key_topics(keyword):
    result = {}
    query_body = {
        'query': {
            'bool': {
                'must': [
                        {'term':{'comput_status':1}},
                        {'wildcard':{'name':'*'+keyword+'*'}}
                        ]
                }
            }
    }
    results = weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_body)
    if results:
        topics = results['hits']['hits']
        for topic in topics:
            try:
                result[topic['_source']['en_name']].append([topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']])
            except:
                result[topic['_source']['en_name']] = [[topic['_source']['name'],topic['_source']['start_ts'],topic['_source']['end_ts'],topic['_source']['comput_status']]]
    return json.dumps(result)
Exemplo n.º 13
0
def submit(topic,start_ts,end_ts,submit_user):
    # print str(topic.decode('utf-8'))
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'term':{
                        'name':topic 
                    }
                }
            }
        }
    }
    print weibo_es
    find_topic = weibo_es.search(index=topic_index_name,doc_type=topic_index_type,body=query_body)['hits']['hits']
    print find_topic
    if len(find_topic)>0:
        en_name = find_topic[0]['_source']['en_name']
    else:
        en_name = p.get_pinyin(topic)+'-'+str(int(time.time()))

    submit_id = start_ts+'_'+end_ts+'_'+en_name+'_'+submit_user
    query_body={
        'name':topic,
        'en_name':en_name,
        'start_ts':start_ts,
        'end_ts':end_ts,
        'submit_user':submit_user,
        'comput_status':0,
        'submit_ts':int(time.time())
    }
    try:
        print weibo_es.get(index=topic_index_name, doc_type=topic_index_type, id=submit_id)['_source']
        result = 'already_have'
    except:
        weibo_es.index(index=topic_index_name,doc_type=topic_index_type,id=submit_id,body=query_body)
        result = 'success'
    r.lpush(topic_queue_name,json.dumps(query_body))
    #该push到redis里,然后改status  计算完了再改回来
    return result