示例#1
0
def get_topic_weibo(topic,en_name,start_ts,end_ts,keywords,mid):
    query_body = {'query':{'match_all':{}},'sort':'timestamp','size':1}
    try:
        task_exist = es_event.search(index=en_name,doc_type=event_type,body=query_body)['hits']['hits']
    except:
        get_mappings(en_name)
    find_flow_texts_scan(start_ts,end_ts,topic,en_name,keywords,mid)
示例#2
0
def get_topic_weibo(topic, en_name, start_ts, end_ts):
    query_body = {'query': {'match_all': {}}, 'sort': 'timestamp', 'size': 1}
    try:
        task_exist = weibo_es.search(index=en_name,
                                     doc_type=topic_index_type,
                                     body=query_body)['hits']['hits']
    except:
        get_mappings(en_name)
    find_flow_texts(start_ts, end_ts, topic, en_name)
def get_topic_tweets(task_id, task_source, event_keywords, create_time):

    task_exist = es_intel.indices.exists(index=task_id)
    #print 'task_exist..',task_exist
    if not task_exist:
        if task_source == 'weibo':
            get_mappings(task_id, index_type='weibo')
        elif task_source == 'facebook':
            print facebook_flow_text_mappings(task_id, index_type='facebook')
        else:
            twitter_flow_text_mappings(task_id, index_type='twitter')

    find_flow_texts(task_source, task_id, event_keywords)
                else:
                    r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_words_dict))

            #identify whether to mapping new es
            weibo_timestamp = item['timestamp']
            should_index_name_date = ts2datetime(weibo_timestamp)
            if should_index_name_date != now_index_name_date:
                if action != [] and xdata != []:
                    index_name = index_name_pre + now_index_name_date
                    if bulk_action:
                        es.bulk(bulk_action, index=index_name, doc_type=index_type, timeout=60)
                    bulk_action = []
                    count = 0
                    now_index_name_date = should_index_name_date
                    index_name = index_name_pre + now_index_name_date
                    get_mappings(index_name)

            # save
            action, xdata = expand_index_action(item)
            bulk_action.extend([action, xdata])
            count += 1
        
        if count % 1000 == 0 and count != 0:
            index_name = index_name_pre + now_index_name_date
            if bulk_action:
                es.bulk(bulk_action, index=index_name, doc_type=index_type, timeout=60)
            bulk_action = []
            count = 0
            class_te = time.time()
            class_ts = class_te
示例#5
0
            #identify whether to mapping new es
            weibo_timestamp = item['timestamp']
            should_index_name_date = ts2datetime(weibo_timestamp)
            if should_index_name_date != now_index_name_date:
                if action != [] and xdata != []:
                    index_name = index_name_pre + now_index_name_date
                    if bulk_action:
                        es.bulk(bulk_action,
                                index=index_name,
                                doc_type=index_type,
                                timeout=60)
                    bulk_action = []
                    count = 0
                    now_index_name_date = should_index_name_date
                    index_name = index_name_pre + now_index_name_date
                    get_mappings(index_name)

            # save
            action, xdata = expand_index_action(item)
            bulk_action.extend([action, xdata])
            count += 1

        if count % 1000 == 0 and count != 0:
            index_name = index_name_pre + now_index_name_date
            if bulk_action:
                es.bulk(bulk_action,
                        index=index_name,
                        doc_type=index_type,
                        timeout=60)
            bulk_action = []
            count = 0