示例#1
0
def get_during_keywords(topic, start_ts, end_ts):  #关键词云,unit=MinInterval
    keywords = []
    # if (end_ts-start_ts)>unit:
    #     begin_ts = end_ts-unit
    # else:
    #     begin_ts = start_ts
    # print begin_ts,end_ts
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'range': {
                        'timestamp': {
                            'gte': start_ts,
                            'lt': end_ts
                        }
                    }
                }
            }
        },
        'size': MAX_LANGUAGE_WEIBO
    }
    keywords_dict = {}
    weibo_text = []
    keyword_weibo = weibo_es.search(index=topic,
                                    doc_type=weibo_index_type,
                                    body=query_body)['hits']['hits']
    for key_weibo in keyword_weibo:
        weibo_text.append(key_weibo['_source']['text'].encode('utf-8'))
    keywords_dict = get_weibo(weibo_text, n_gram=2, n_count=100)
    '''
    print keyword_weibo
    for key_weibo in keyword_weibo:
        keywords_dict_list = json.loads(key_weibo['_source']['keywords_dict'])  #
        #print keywords_dict_list,type(keywords_dict_list)
        for k,v in keywords_dict_list.iteritems():
            try:
                keywords_dict[k] += v
            except:
                keywords_dict[k] = v
    '''
    word_results = sorted(keywords_dict.iteritems(),
                          key=lambda x: x[1],
                          reverse=True)[:MAX_FREQUENT_WORDS]
    return json.dumps(word_results)
示例#2
0
文件: utils.py 项目: SwoJa/ruman
def get_during_keywords(topic,start_ts,end_ts):  #关键词云,unit=MinInterval
    keywords = []
    # if (end_ts-start_ts)>unit:
    #     begin_ts = end_ts-unit
    # else:
    #     begin_ts = start_ts
    # print begin_ts,end_ts
    query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'range':{
                        'timestamp':{'gte': start_ts, 'lt':end_ts} 
                    }
                }
            }
        },
        'size':MAX_LANGUAGE_WEIBO
    }
    keywords_dict = {}
    weibo_text = []
    keyword_weibo = weibo_es.search(index=topic,doc_type=weibo_index_type,body=query_body)['hits']['hits']   
    for key_weibo in keyword_weibo:
        weibo_text.append(key_weibo['_source']['text'].encode('utf-8'))
    keywords_dict = get_weibo(weibo_text,n_gram=2,n_count=100)
    '''
    print keyword_weibo
    for key_weibo in keyword_weibo:
        keywords_dict_list = json.loads(key_weibo['_source']['keywords_dict'])  #
        #print keywords_dict_list,type(keywords_dict_list)
        for k,v in keywords_dict_list.iteritems():
            try:
                keywords_dict[k] += v
            except:
                keywords_dict[k] = v
    '''
    word_results = sorted(keywords_dict.iteritems(),key=lambda x:x[1],reverse=True)[:MAX_FREQUENT_WORDS]   
    return json.dumps(word_results)