def get_interval_count(topic, date, windowsize): results = [] ts_list = [] start_date = ts2datetime(datetime2ts(date) - windowsize * Day) unit = 900 print 'start_date:', start_date start_ts = datetime2ts(start_date) ts_list = [start_ts] end_ts = datetime2ts(date) interval = (end_ts - start_ts) / during print 'interval:', interval print topic if MYSQL_TOPIC_LEN == 0: topic0 = topic[:20] else: topic0 = topic for i in range(interval, 0, -1): #print 'i:', i begin_ts = end_ts - during * i over_ts = begin_ts + during #print 'begin_ts:', begin_ts#ts2date(begin_ts) #print 'over_ts:', over_ts#ts2date(over_ts) ts_list.append(over_ts) items = db.session.query(PropagateCount).filter(PropagateCount.topic==topic0 ,\ PropagateCount.end<=over_ts ,\ PropagateCount.end>begin_ts ,\ PropagateCount.range==unit).all() #).all() if items: result = len(items) else: result = 0 results.append(float(result)) print 'detect_peak_bottom_line::', results new_zeros = detect_peaks(results) # 返回峰值出现的时间区间的序号 new_bottom = detect_bottom(results) # get the first bottom print 'new_peaks:', new_zeros print 'new_bottom:', new_bottom # 存趋势时间范围 # save_peak_bottom(new_zeros, new_bottom) #trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list, topic_xapian_id) trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list) print 'trend_makers:', trend_maker trend_pusher = get_pushers(topic, new_zeros, new_bottom, ts_list) print 'trend_pushers:', trend_pusher save_trend_maker(topic, date, windowsize, trend_maker) save_trend_pusher(topic, date, windowsize, trend_pusher) return trend_maker, trend_pusher
def get_interval_count(topic, date, windowsize, topic_xapian_id): results = [0] ts_list = [] start_date = ts2datetime(datetime2ts(date) - windowsize * Day) #unit = 900 print 'start_date:', start_date start_ts = datetime2ts(start_date) ts_list = [start_ts] end_ts = datetime2ts(date) interval = (end_ts - start_ts) / during print 'interval:', interval for i in range(interval, 0, -1): #print 'i:', i begin_ts = end_ts - during * i over_ts = begin_ts + during #print 'begin_ts:', ts2date(begin_ts) #print 'over_ts:', ts2date(over_ts) ts_list.append(over_ts) items = db.session.query(PropagateCount).filter(PropagateCount.topic==topic ,\ PropagateCount.end<=over_ts ,\ PropagateCount.end>begin_ts ,\ PropagateCount.range==unit).all() if items: result = Merge_propagate(items) else: result = 0 results.append(float(result)) print 'detect_peak_bottom_line::', results new_zeros = detect_peaks(results) # 返回峰值出现的时间区间的序号 new_bottom = detect_bottom(results) # get the first bottom print 'new_peaks:', new_zeros print 'new_bottom:', new_bottom # 存趋势时间范围 # save_peak_bottom(new_zeros, new_bottom) trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list, topic_xapian_id) print 'trend_makers:', trend_maker trend_pusher = get_pushers(topic,new_zeros, new_bottom, ts_list, topic_xapian_id) print 'trend_pushers:', trend_pusher save_trend_maker(topic, date, windowsize, trend_maker, topic_xapian_id) save_trend_pusher(topic, date, windowsize, trend_pusher, topic_xapian_id) return trend_maker, trend_pusher
def trend_user(topic, start_ts, end_ts, news_collection, comment_collection): '''利用时间分析部分的计算结果 ''' ts_list, count_list = get_interval_count(topic, start_ts, end_ts) # 波峰 new_peaks = detect_peaks(count_list) print 'news_peaks:', new_peaks # 波谷 new_bottom = detect_bottom(count_list) print 'news_bottom:', new_bottom # trend_maker trend_maker = get_maker(topic, new_peaks, new_bottom, ts_list, news_collection) print 'len(trend_maker):', len(trend_maker) # trend_pusher trend_pusher = get_pusher(topic, new_peaks, new_bottom, ts_list, news_collection, comment_collection) print 'len(trend_pusher):', len(trend_pusher) save_trend_maker(topic, start_ts, end_ts, trend_maker) save_trend_pusher(topic, start_ts, end_ts, trend_pusher)
def get_interval_count(topic, date, windowsize): index_name = index_event_analysis_results index_type = type_event_analysis_results results = [] ts_list = [] start_date = ts2datetime(datetime2ts(date) - windowsize * Day) unit = 900 print 'start_date:', start_date start_ts = datetime2ts(start_date) ts_list = [start_ts] end_ts = datetime2ts(date) interval = (end_ts - start_ts) / during print 'interval:', interval print topic ''' if MYSQL_TOPIC_LEN == 0: topic0 = topic[:20] else: topic0=topic ''' for i in range(interval, 0, -1): #print 'i:', i begin_ts = long(end_ts) - during * i over_ts = begin_ts + during #print 'begin_ts:', begin_ts#ts2date(begin_ts) #print 'over_ts:', over_ts#ts2date(over_ts) ts_list.append(over_ts) ''' items = db.session.query(PropagateCount).filter(PropagateCount.topic==topic0 ,\ PropagateCount.end<=over_ts ,\ PropagateCount.end>begin_ts ,\ PropagateCount.range==unit).all() #).all() ''' ''' query_body = { 'query':{ 'bool':{ 'must':[ {'range':{'end_ts':{'gt':begin_ts,'lte':over_ts}}}, {'term':{'en_name':topic0}}, {'term':{'range':unit}} ] } }, 'size': 1000000 # 返回条数限制 待删 } items = weibo_es.search(index=index_name,doc_type=index_type,body=query_body)['hits']['hits'] ''' query_body = { 'query': { 'bool': { 'must': [{ 'term': { 'en_name': topic } }] } }, 'size': 1000000 } es_results = weibo_es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] #print 'results::::::::::',results print 'len_results:::::::::::', len(es_results) count = 0 for result in es_results: result = result['_source'] time_results = json.loads(result['time_results']) count_results = time_results['count'] print 'type_time_results:::::::', type(time_results) time_time = time_results.keys() print 'time_results.keys:::::', time_time.sort() #print 'time_results.keys:::::',len(time_time.sort()) if time_results['during'] == unit: print 'count_results.keys()::::;', count_results.keys() for end_ts_count in count_results.keys(): if end_ts_count > begin_ts and end_ts_count <= over_ts: count += 1 ''' if items: result = len(items) else: result = 0 results.append(float(result)) ''' ''' if count: result = count else: result = 0 ''' results.append(float(count)) print 'results::::::::::', results #print abababa print 'detect_peak_bottom_line::::::', results new_zeros = detect_peaks(results) # 返回峰值出现的时间区间的序号 new_bottom = detect_bottom(results) # get the first bottom print 'new_zeros:::::::::::::::::', new_zeros print 'new_bottom::::::::::::::::', new_bottom print 'ts_list:::::::::::::::::::', ts_list # 存趋势时间范围 # save_peak_bottom(new_zeros, new_bottom) #trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list, topic_xapian_id) trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list) print 'trend_makers:', trend_maker trend_pusher = get_pushers(topic, new_zeros, new_bottom, ts_list) print 'trend_pushers:', trend_pusher #save_trend_maker(topic, date, windowsize, trend_maker) maker_results = save_trend_maker_es(topic, date, windowsize, trend_maker) #save_trend_pusher(topic, date, windowsize, trend_pusher) pusher_results = save_trend_pusher_es(topic, date, windowsize, trend_pusher) return maker_results, pusher_results