def preprocess(): data = load_data.load_data() print( "============================= Database loaded =============================" ) freqs = data[0] raw_signals = data[1] ids = data[2] signals = clean_signal.clean_signal(freqs, raw_signals) print( "============================== Noises Removed =============================" ) preprocessed_data = {} for idx in range(0, len(signals)): signal = signals[idx] freq = freqs[idx] print("Detecting peaks in patient", idx + 1, "data", end='\r') peaks = peak_detection.detect_peaks(signal, freq) segmented_data = segment(signal, peaks) preprocessed_data[ids[idx]] = segmented_data return preprocessed_data
def test(sample_path): detection_params = {'w_s': 11, 'peak_radius': 4., 'threshold': 40., 'max_peaks': 4 } sample = TiffFile(sample_path) curr_dir = os.path.dirname(__file__) fname = os.path.join( curr_dir, os.path.join(sample.fpath, sample.fname)) arr = sample.asarray() peaks = detect_peaks(arr, shape_label=('t', 'z', 'x', 'y'), verbose=True, show_progress=False, parallel=True, **detection_params) # del sample # sample = None gc.get_referrers(arr) del arr gc.collect()
def getPeaks(): '''获取拐点数据 ''' from peak_detection import detect_peaks limit = int(request.args.get('limit', 10)) query = request.args.get('query', None) during = int(request.args.get('during', 24 * 3600)) subevent_status = request.args.get('subevent', 'global') lis = request.args.get('lis', '') try: lis = [float(da) for da in lis.split(',')] except: lis = [] ts_lis = request.args.get('ts', '') ts_lis = [float(da) for da in ts_lis.split(',')] new_zeros = detect_peaks(lis) time_lis = {} for idx, point_idx in enumerate(new_zeros): ts = ts_lis[point_idx] end_ts = ts time_lis[idx] = {'ts': end_ts * 1000, 'title': str(idx)} return json.dumps(time_lis)
def getPeaks(): '''获取拐点数据 ''' from peak_detection import detect_peaks limit = int(request.args.get('limit', 10)) query = request.args.get('query', None) during = int(request.args.get('during', 24 * 3600)) subevent_status = request.args.get('subevent', 'global') lis = request.args.get('lis', '') try: lis = [float(da) for da in lis.split(',')] except: lis = [] ts_lis = request.args.get('ts', '') ts_lis = [float(da) for da in ts_lis.split(',')] new_zeros = detect_peaks(lis) time_lis = {} for idx, point_idx in enumerate(new_zeros): ts = ts_lis[point_idx] end_ts = ts time_lis[idx] = { 'ts': end_ts * 1000, 'title': str(idx) } return json.dumps(time_lis)
def getPeaks(): '''获取情绪拐点数据 ''' customized = request.args.get('customized', '1') limit = request.args.get('limit', 10) query = request.args.get('query', None) if query: query = query.strip() during = request.args.get('during', 24 * 3600) during = int(during) emotion = request.args.get('emotion', 'happy') lis = request.args.get('lis', '') try: lis = [float(da) for da in lis.split(',')] except: lis = [] if not lis or not len(lis): return 'Null Data' ts_lis = request.args.get('ts', '') ts_lis = [float(da) for da in ts_lis.split(',')] new_zeros = detect_peaks(lis) search_method = 'topic' area = None search_func = getattr(keywordsModule, 'search_%s_keywords' % search_method, None) if not search_func: return json.dumps('search function undefined') title_text = {'happy': [], 'angry': [], 'sad': [], 'news': []} title = {'happy': 'A', 'angry': 'B', 'sad': 'C', 'news': 'D'} time_lis = {} for idx, point_idx in enumerate(new_zeros): print idx, point_idx ts = ts_lis[point_idx] end_ts = ts v = emotions_kv[emotion] time_lis[idx] = { 'ts': end_ts * 1000, 'title': title[emotion] + str(idx), } print 'peak:', time_lis return json.dumps(time_lis)
def get_interval_count(topic, date, windowsize): results = [] ts_list = [] start_date = ts2datetime(datetime2ts(date) - windowsize * Day) unit = 900 print 'start_date:', start_date start_ts = datetime2ts(start_date) ts_list = [start_ts] end_ts = datetime2ts(date) interval = (end_ts - start_ts) / during print 'interval:', interval print topic if MYSQL_TOPIC_LEN == 0: topic0 = topic[:20] else: topic0 = topic for i in range(interval, 0, -1): #print 'i:', i begin_ts = end_ts - during * i over_ts = begin_ts + during #print 'begin_ts:', begin_ts#ts2date(begin_ts) #print 'over_ts:', over_ts#ts2date(over_ts) ts_list.append(over_ts) items = db.session.query(PropagateCount).filter(PropagateCount.topic==topic0 ,\ PropagateCount.end<=over_ts ,\ PropagateCount.end>begin_ts ,\ PropagateCount.range==unit).all() #).all() if items: result = len(items) else: result = 0 results.append(float(result)) print 'detect_peak_bottom_line::', results new_zeros = detect_peaks(results) # 返回峰值出现的时间区间的序号 new_bottom = detect_bottom(results) # get the first bottom print 'new_peaks:', new_zeros print 'new_bottom:', new_bottom # 存趋势时间范围 # save_peak_bottom(new_zeros, new_bottom) #trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list, topic_xapian_id) trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list) print 'trend_makers:', trend_maker trend_pusher = get_pushers(topic, new_zeros, new_bottom, ts_list) print 'trend_pushers:', trend_pusher save_trend_maker(topic, date, windowsize, trend_maker) save_trend_pusher(topic, date, windowsize, trend_pusher) return trend_maker, trend_pusher
def f(self, index, count, beta, width): try: k = (float(index) + 1.0) / float(count) * 100.0 self.schema["data"][index]["output_data"] = \ peak_detection.detect_peaks(self.schema["data"][index]["input_data"], beta, width) self.ui.progress.setValue(k) if k == 100: self.api.send_system_message("Выполнено", "Преобразование выполнено") self.set_list_files() QApplication.processEvents() except Exception as err: self.api.send_system_message("Ошибка", "Не удалось преобразовать данные")
def fit_peaks(spectrum, wavenumbers): diff_spectrum = pd.corrected_diff_spectrum(spectrum) noise_bin, noise_mean, noise_stdd = sf.get_moving_noise(spectrum) peak_results = pd.detect_peaks(spectrum, diff_spectrum) combined_spectrum = np.zeros_like(wavenumbers) peak_positions = peak_results["peaks"] peak_prom = peak_results["prom"] peak_widths = peak_results["widths"] first_pass = np.hstack( (np.array([peak_positions]).T, np.array([peak_prom]).T, width.T)) sigma = 2 * np.ones_like(first_pass) peak_list = [] for peak in peak_positions: pass
def get_interval_count(topic, date, windowsize, topic_xapian_id): results = [0] ts_list = [] start_date = ts2datetime(datetime2ts(date) - windowsize * Day) #unit = 900 print 'start_date:', start_date start_ts = datetime2ts(start_date) ts_list = [start_ts] end_ts = datetime2ts(date) interval = (end_ts - start_ts) / during print 'interval:', interval for i in range(interval, 0, -1): #print 'i:', i begin_ts = end_ts - during * i over_ts = begin_ts + during #print 'begin_ts:', ts2date(begin_ts) #print 'over_ts:', ts2date(over_ts) ts_list.append(over_ts) items = db.session.query(PropagateCount).filter(PropagateCount.topic==topic ,\ PropagateCount.end<=over_ts ,\ PropagateCount.end>begin_ts ,\ PropagateCount.range==unit).all() if items: result = Merge_propagate(items) else: result = 0 results.append(float(result)) print 'detect_peak_bottom_line::', results new_zeros = detect_peaks(results) # 返回峰值出现的时间区间的序号 new_bottom = detect_bottom(results) # get the first bottom print 'new_peaks:', new_zeros print 'new_bottom:', new_bottom # 存趋势时间范围 # save_peak_bottom(new_zeros, new_bottom) trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list, topic_xapian_id) print 'trend_makers:', trend_maker trend_pusher = get_pushers(topic,new_zeros, new_bottom, ts_list, topic_xapian_id) print 'trend_pushers:', trend_pusher save_trend_maker(topic, date, windowsize, trend_maker, topic_xapian_id) save_trend_pusher(topic, date, windowsize, trend_pusher, topic_xapian_id) return trend_maker, trend_pusher
def trend_user(topic, start_ts, end_ts, news_collection, comment_collection): '''利用时间分析部分的计算结果 ''' ts_list, count_list = get_interval_count(topic, start_ts, end_ts) # 波峰 new_peaks = detect_peaks(count_list) print 'news_peaks:', new_peaks # 波谷 new_bottom = detect_bottom(count_list) print 'news_bottom:', new_bottom # trend_maker trend_maker = get_maker(topic, new_peaks, new_bottom, ts_list, news_collection) print 'len(trend_maker):', len(trend_maker) # trend_pusher trend_pusher = get_pusher(topic, new_peaks, new_bottom, ts_list, news_collection, comment_collection) print 'len(trend_pusher):', len(trend_pusher) save_trend_maker(topic, start_ts, end_ts, trend_maker) save_trend_pusher(topic, start_ts, end_ts, trend_pusher)
def PropagatePeakNews(): limit = request.args.get('limit', 10) topic = request.args.get('topic', None) if topic: topic = topic.strip() during = request.args.get('during', 900) during = int(during) mtype = request.args.get('mtype', '') mtype = int(mtype) lis = request.args.get('lis', '') try: lis = [float(da) for da in lis.split(',')] except: lis =[] if not lis or not len(lis): return 'Null Data' ts_lis = request.args.get('ts', '') ts_lis = [float(da) for da in ts_lis.split(',')] new_zeros = detect_peaks(lis) title_text = {'origin': [], 'forward': [], 'total':[]} title = {'1': 'A', '2': 'B', '3': 'C', '4': 'D'} time_lis = {} for idx, point_idx in enumerate(new_zeros): # print idx, point_idx ts = ts_lis[point_idx] end_ts = ts v = mtype time_lis[idx] = { 'ts': end_ts * 1000, 'title': title[str(mtype)] + str(idx), } return json.dumps(time_lis)
import sys sys.path.append("..") from peak_detection import detect_peaks from tifffile import TiffFile fname = 'sample.tif' detection_parameters = { 'w_s': 10, 'peak_radius': 4., 'threshold': 60., 'max_peaks': 10 } sample = TiffFile(fname) peaks = detect_peaks(sample.asarray(), shape_label=('t', 'z', 'x', 'y'), parallel=True, verbose=True, show_progress=False, **detection_parameters) for id, p in peaks.groupby(level="stacks"): print p.shape[0]
def get_interval_count(topic, date, windowsize): index_name = index_event_analysis_results index_type = type_event_analysis_results results = [] ts_list = [] start_date = ts2datetime(datetime2ts(date) - windowsize * Day) unit = 900 print 'start_date:', start_date start_ts = datetime2ts(start_date) ts_list = [start_ts] end_ts = datetime2ts(date) interval = (end_ts - start_ts) / during print 'interval:', interval print topic ''' if MYSQL_TOPIC_LEN == 0: topic0 = topic[:20] else: topic0=topic ''' for i in range(interval, 0, -1): #print 'i:', i begin_ts = long(end_ts) - during * i over_ts = begin_ts + during #print 'begin_ts:', begin_ts#ts2date(begin_ts) #print 'over_ts:', over_ts#ts2date(over_ts) ts_list.append(over_ts) ''' items = db.session.query(PropagateCount).filter(PropagateCount.topic==topic0 ,\ PropagateCount.end<=over_ts ,\ PropagateCount.end>begin_ts ,\ PropagateCount.range==unit).all() #).all() ''' ''' query_body = { 'query':{ 'bool':{ 'must':[ {'range':{'end_ts':{'gt':begin_ts,'lte':over_ts}}}, {'term':{'en_name':topic0}}, {'term':{'range':unit}} ] } }, 'size': 1000000 # 返回条数限制 待删 } items = weibo_es.search(index=index_name,doc_type=index_type,body=query_body)['hits']['hits'] ''' query_body = { 'query': { 'bool': { 'must': [{ 'term': { 'en_name': topic } }] } }, 'size': 1000000 } es_results = weibo_es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'] #print 'results::::::::::',results print 'len_results:::::::::::', len(es_results) count = 0 for result in es_results: result = result['_source'] time_results = json.loads(result['time_results']) count_results = time_results['count'] print 'type_time_results:::::::', type(time_results) time_time = time_results.keys() print 'time_results.keys:::::', time_time.sort() #print 'time_results.keys:::::',len(time_time.sort()) if time_results['during'] == unit: print 'count_results.keys()::::;', count_results.keys() for end_ts_count in count_results.keys(): if end_ts_count > begin_ts and end_ts_count <= over_ts: count += 1 ''' if items: result = len(items) else: result = 0 results.append(float(result)) ''' ''' if count: result = count else: result = 0 ''' results.append(float(count)) print 'results::::::::::', results #print abababa print 'detect_peak_bottom_line::::::', results new_zeros = detect_peaks(results) # 返回峰值出现的时间区间的序号 new_bottom = detect_bottom(results) # get the first bottom print 'new_zeros:::::::::::::::::', new_zeros print 'new_bottom::::::::::::::::', new_bottom print 'ts_list:::::::::::::::::::', ts_list # 存趋势时间范围 # save_peak_bottom(new_zeros, new_bottom) #trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list, topic_xapian_id) trend_maker = get_makers(topic, new_zeros, new_bottom, ts_list) print 'trend_makers:', trend_maker trend_pusher = get_pushers(topic, new_zeros, new_bottom, ts_list) print 'trend_pushers:', trend_pusher #save_trend_maker(topic, date, windowsize, trend_maker) maker_results = save_trend_maker_es(topic, date, windowsize, trend_maker) #save_trend_pusher(topic, date, windowsize, trend_pusher) pusher_results = save_trend_pusher_es(topic, date, windowsize, trend_pusher) return maker_results, pusher_results
import sys sys.path.append("..") from peak_detection import detect_peaks from peak_detection import show_peaks from tifffile import TiffFile fname = 'sample.tif' detection_parameters = {'w_s': 10, 'peak_radius': 4., 'threshold': 60., 'max_peaks': 10 } sample = TiffFile(fname) arr = sample.asarray() peaks = detect_peaks(arr, shape_label=('t', 'z', 'x', 'y'), parallel=True, verbose=True, show_progress=False, **detection_parameters) for id, p in peaks.groupby(level="stacks"): print((p.shape[0])) show_peaks(arr, peaks, 3)
def get_user_comment_retweet(task_exist): result = {} # result = {'uid1_comment':{ts:value}, 'uid1_retweet':{ts_value}, 'uid2_comment'} submit_date = task_exist['submit_date'] start_ts = date2ts(submit_date) task_status = task_exist['status'] if task_status == 1: now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) segment = int((now_ts - now_date_ts) / 900) + 1 end_ts = now_date_ts + segment * 900 #test end_ts = datetime2ts('2013-09-02') else: end_ts = date2ts(task_exist['end_date']) task_user = task_exist['uid_list'] select_top_dict = {} # {uid:[ave_retweet_count, ave_peak_retweet_count]} #select union top5 ave_retweet_count and top5 ave_peak_retweet_count for user in task_user: result[user+'_comment'] = {} result[user+'_retweet'] = {} comment_retweet_dict = monitor_r.hgetall(user) for item in comment_retweet_dict: item_type_ts = item.split('_') item_type = item_type_ts[0] item_ts = item_type_ts[1] result[user+'_'+item_type][item_ts] = int(comment_retweet_dict[item]) # use to detect peaks comment_dict = result[user+'_comment'] complement_comment_dict = complement_ts(comment_dict, start_ts, end_ts) sort_comment_dict = sorted(complement_comment_dict.items(), key=lambda x:int(x[0])) detect_peaks_comment_input = [item[1] for item in sort_comment_dict] #print 'detect_peaks_comment_input:', detect_peaks_comment_input result[user+'_comment_peak'] = detect_peaks(detect_peaks_comment_input) retweet_dict = result[user+'_retweet'] complement_retweet_dict = complement_ts(retweet_dict, start_ts, end_ts) sort_retweet_dict = sorted(complement_retweet_dict.items(), key=lambda x:int(x[0])) detect_peaks_retweet_input = [item[1] for item in sort_retweet_dict] result[user+'_retweet_peak'] = detect_peaks(detect_peaks_retweet_input) ave_retweet_count = sum(detect_peaks_retweet_input) / len(detect_peaks_retweet_input) peak_count_list = [detect_peaks_retweet_input[peak_location] for peak_location in result[user+'_retweet_peak']] ave_peak_count = sum(peak_count_list) / len(peak_count_list) select_top_dict[user] = [ave_retweet_count, ave_peak_count] #select union top5 sort_select_top_count_dict = sorted(select_top_dict.items(), key=lambda x:x[1][0], reverse=True) top5_count_user_list = sort_select_top_count_dict[:5] top5_count_user = [item[0] for item in top5_count_user_list] sort_select_top_peak_dict = sorted(select_top_dict.items(), key=lambda x:x[1][1], reverse=True) top5_peak_user_list = sort_select_top_peak_dict[:5] top5_peak_user = [item[0] for item in top5_peak_user_list] union_user = list(set(top5_count_user) & set(top5_peak_user)) new_result = {} for user in union_user: new_result[user+'_retweet'] = result[user+'_retweet'] new_result[user+'_retweet_peak'] = result[user+'_retweet_peak'] new_result[user+'_comment'] = result[user+'_comment'] new_result[user+'_comment_peak'] = result[user+'_comment_peak'] new_result['profile'] = get_top_user_profile(union_user) #compute abnormal index new_result['abnormal_index'] = compute_comment_retweet_abnormal(new_result, union_user) return new_result
except: result['hashtag_'+sensitive][timestamp_date][hashtag] = hashtag_dict[hashtag] begin_ts += search_time_segment # compute peak for count/sentiment/sensitive_score peak_compute_field = ['count_0', 'count_1', 'sentiment_0_126', 'sentiment_0_127', 'sentiment_0_128',\ 'sentiment_0_129', 'sentiment_0_130', 'sensitive_score',\ 'sentiment_1_126', 'sentiment_1_127', 'sentiment_1_128', 'sentiment_1_129', \ 'sentiment_1_130'] #print 'compute_peak' for field in peak_compute_field: complement_item = complement_ts(result[field], start_ts, end_ts) sort_complement_item = sorted(complement_item.items(), key=lambda x:int(x[0])) detect_peaks_input = [item[1] for item in sort_complement_item] #print 'start_detect_peaks' result[field+'_peak'] = detect_peaks(detect_peaks_input) result[field] = sort_complement_item #compute abnormal evaluate abnormal_index_dict = compute_abnormal(result) #print 'abnormal_index_dict:', abnormal_index_dict result = dict(result, **abnormal_index_dict) #sort geo dict result by date geo_dict_0 = result['geo_0'] sort_geo_dict_0 = sorted(geo_dict_0.items(), key=lambda x:datetime2ts(x[0])) result['geo_0'] = sort_geo_dict_0 geo_dict_1 = result['geo_1'] sort_geo_dict_1 = sorted(geo_dict_1.items(), key=lambda x:datetime2ts(x[0])) result['geo_1'] = sort_geo_dict_1 #sort hashtag dict by date hashtag_dict_0 = result['hashtag_0']
def getPeaks(): '''获取情绪拐点数据 ''' customized = request.args.get('customized', '1') limit = request.args.get('limit', 10) query = request.args.get('query', None) if query: query = query.strip() during = request.args.get('during', 24 * 3600) during = int(during) area = request.args.get('area', 'global') emotion = request.args.get('emotion', 'happy') lis = request.args.get('lis', '') try: lis = [float(da) for da in lis.split(',')] except: lis = [] if not lis or not len(lis): return 'Null Data' ts_lis = request.args.get('ts', '') ts_lis = [float(da) for da in ts_lis.split(',')] new_zeros = detect_peaks(lis) if area == 'global': search_method = 'global' if query: search_method = 'topic' area = None else: search_method = 'domain' search_func = getattr(keywordsModule, 'search_%s_keywords' % search_method, None) if not search_func: return json.dumps('search function undefined') title_text = {'happy': [], 'angry': [], 'sad': []} title = {'happy': 'A', 'angry': 'B', 'sad': 'C'} time_lis = {} for i in range(0, len(ts_lis)): if i in new_zeros: ts = ts_lis[i] begin_ts = ts - during end_ts = ts v = emotions_kv[emotion] #keywords_with_count = search_func(end_ts, during, v, query=query, domain=area, top=limit, customized=customized) #text = ','.join([k for k, v in keywords_with_count.iteritems()]) time_lis[i] = { 'ts': end_ts * 1000, 'title': title[emotion] + str(new_zeros.index(i)), #'text': text } return json.dumps(time_lis)