示例#1
0
def news_content(task_source,task_id,news_limit = NEWS_LIMIT):

    if task_source == 'weibo':
        query_body ={'query':{
                        'bool':{
                            'should':[
                                {'wildcard':{'text':'*【*】*'}},
                                {'wildcard':{'text':'*#*#*'}}
                                ]
                        }
                    },
                    'size':news_limit  
                    }
    else:
        query_body = {
            'query':{
                'bool':{
                    'should':[
                        {'wildcard':{'text':'*【*】*'}},
                        {'wildcard':{'text':'*#*'}}
                        ]
                }
            },
            'sort':'share',
            'size':news_limit
        }

    news_results = es_intel.search(index=task_id,doc_type=task_source,body=query_body)['hits']['hits']#['_source']
    # print topic,weibo_index_type,start_ts,end_ts,query_body
    # print news_results
    news_list = []
    for key_weibo in news_results:
        text_weibo = key_weibo['_source']['text']
        uid = key_weibo['_source']['uid']
        timestamp = key_weibo['_source']['timestamp']
        comment = key_weibo['_source']['comment']

        if task_source == 'weibo':
            mid_weibo = key_weibo['_source']['mid']
            retweeted = key_weibo['_source']['retweeted']
        elif task_source == 'facebook':
            mid_weibo = key_weibo['_source']['fid']
            retweeted = key_weibo['_source']['share']
        else:
            mid_weibo = key_weibo['_source']['tid']
            retweeted = key_weibo['_source']['share']
        
        news_list.append({'news_id':'news','content168':text_weibo,'id':mid_weibo,'datetime':ts2datetime_full(timestamp),'comment':comment,'retweeted':retweeted})
    return news_list
def get_models_text(task_id, task_source, opinion_keywords_list):

    if task_source == 'weibo':
        sort_item = 'retweeted'
    else:
        sort_item = 'share'

    query_body_pos = {
        'query': {
            'terms': {
                'sentiment': SENTIMENT_POS
            }
        },
        'sort': {
            sort_item: {
                'order': 'desc'
            }
        },
        'size': MAX_SEARCH_SIZE
    }

    query_body_neg = {
        'query': {
            'terms': {
                'sentiment': SENTIMENT_NEG
            }
        },
        'sort': {
            sort_item: {
                'order': 'desc'
            }
        },
        'size': MAX_SEARCH_SIZE
    }

    query_body_news = {
        'query': {
            'bool': {
                'must': [{
                    'wildcard': {
                        'text': '*【*】*'
                    }
                }]
            }
        },
        'sort': {
            sort_item: {
                'order': 'desc'
            }
        },
        'size': MAX_SEARCH_SIZE
    }

    results_pos = es_intel.search(index=task_id,
                                  doc_type=task_source,
                                  body=query_body_pos)['hits']['hits']
    results_neg = es_intel.search(index=task_id,
                                  doc_type=task_source,
                                  body=query_body_neg)['hits']['hits']
    results_news = es_intel.search(index=task_id,
                                   doc_type=task_source,
                                   body=query_body_news)['hits']['hits']

    text_list_pos = []
    text_list_neg = []
    text_list_news = []

    for result_pos in results_pos:
        text_list_pos.append(result_pos['_source']['text'])

    for result_neg in results_neg:
        text_list_neg.append(result_neg['_source']['text'])

    for result_news in results_news:
        text_list_news.append(result_news['_source']['text'])

    model_text_dict = {}

    model_text_pos = text_generation_main(text_list_pos, opinion_keywords_list)
    model_text_neg = text_generation_main(text_list_neg, opinion_keywords_list)
    model_text_news = text_generation_main(text_list_news,
                                           opinion_keywords_list)

    model_text_dict['model_text_pos'] = model_text_pos
    model_text_dict['model_text_neg'] = model_text_neg
    model_text_dict['model_text_news'] = model_text_news

    print 'model_text_dict..', model_text_dict

    save2models_text(task_id, model_text_dict)