def main(): connection = pg.connect("dbname = rem user = wireless password = wireless") df = psql.read_sql("select occ, noise_floor, timetag from spectruminfo order by timetag DESC LIMIT 1000", connection) tempocc = df['occ'].values tempnf = df['noise_floor'].values occ = np.zeros((df.shape[0],16)) nf = np.zeros((df.shape[0],16)) for i in range (0, len(occ)-1): occ[i,:] = np.copy(np.array(tempocc[i])) nf[i,:] = np.copy(np.array(tempnf[i])) fitness = np.zeros((16,1)) plt.subplot(411) for i in range(195,210): plt.plot(occ[i,:]) plt.subplot(412) plt.plot(occ[:,6]) #plt.plot(occ[:,6]) plt.subplot(413) plt.plot(occ[:,13]) plt.subplot(414) plt.plot(occ[:,12]) for i in range(0,16): thr = np.mean(nf[:,i]) print 10.0/np.fabs(thr) #print np.mean(occ[:,i]) occ[:,i] = stats.threshold(occ[:,i],threshmax = 10.0/np.fabs(thr), newval=1) occ[:,i] = stats.threshold(occ[:,i],threshmin = 0.9, newval=0) plt.subplot(413) plt.plot(occ[:,13]) plt.subplot(412) plt.plot(occ[:,8]) plt.subplot(414) plt.plot(occ[:,1]) print bd.enumerate_bursts(occ[:,8], 'burstLabel') # print zero_runs(occ[:,8]) #plt.hist(np.histogram(occ[:,0]), bins = [0, 1]) plt.show()
def find_bursts(d, all_r, word_list): ''' burst detection function ''' s = 2 # resolution of state jumps; higher s --> fewer but stronger bursts gam = 0.5 # difficulty of moving up a state; larger gamma --> harder to move up states, less bursty n = len(d) # number of timepoints smooth_win = 5 all_bursts = pd.DataFrame(columns=['begin', 'end', 'weight']) for i, word, in enumerate(word_list): r = all_r.loc[:, word].astype(int) # find the optimal state sequence (using the Viterbi algorithm) [q,d,r,p] = bd.burst_detection(r, d, n, s, gam, smooth_win) # enumerate the bursts bursts = bd.enumerate_bursts(q, word) # find weights of each burst bursts_weighted = bd.burst_weights(bursts, r, d, p) # add the weighted burst to list of all bursts all_bursts = all_bursts.append(bursts_weighted, ignore_index=True) # print a progress report every 100 words if np.mod(i, 100) == 0: print('total words', len(word_list), 'word', i, 'complete') return all_bursts.sort_values(by='weight', ascending=False)
def detect_bursts_of_a_timeseries(self, timeseries_df, gamma=None): ''' detect intervals with bursts of activity: [start_timestamp, end_timestamp) :param ts_df: timeseries_df for a single platform :param s: multiplicative distance between states (input to burst_detection library) :param gamma: difficulty associated with moving up a state (input to burst_detection library) burst_detection library: https://pypi.org/project/burst_detection/ ''' if len(timeseries_df) < 2: return None r = timeseries_df[self.id_col].values n = len(r) d = np.array([sum(r)] * n, dtype=float) if gamma is None and np.max(r) >= 5: gamma = self.predict_gamma_for_timeseries(timeseries_df) with open('predicted_gammas.csv', 'a') as f: f.write(self.content_id + ',' + str(gamma) + '\n') else: return None q = bd.burst_detection(r, d, n, s=2, gamma=gamma, smooth_win=1)[0] bursts_df = bd.enumerate_bursts(q, 'burstLabel') # returns a df with 'begin' and 'end' columns for a burst where both begin and end indices are included. index_date = pd.Series( timeseries_df[self.timestamp_col].values, index=timeseries_df.index).to_dict() time_granularity = index_date[1] - index_date[0] bursts_df['start_timestamp'] = bursts_df['begin'].map(index_date) bursts_df['end_timestamp'] = bursts_df['end'].map(index_date) bursts_df['end_timestamp'] = bursts_df['end_timestamp'] + time_granularity if len(bursts_df) > 0: return bursts_df
def detect_bursts(company_date_abstract,topics_list): full_save_string = "" err_string = "" for company in company_date_abstract: co_bursts_str = "" co_list = company_date_abstract[company] for i,topic in enumerate(topics_list): bursts_string = "" r = [] d = [] for date_abs in co_list: abs_list = date_abs[1] d.append(len(abs_list)) target_events = 0 for ab in abs_list: for keyword in topic: if keyword in ab: target_events += 1 break r.append(target_events) n = len(r) if all(elem == 0 for elem in r): continue try: q,d,r,p = bd.burst_detection(r,d,n,s=1.5,gamma=1.0,smooth_win=1) # I think the error here is that s = 2 and for 1x2 arrays of r = [1,0] and [1,1] respectively, p[0] = 1/2 so then p=1 (line 60 of burst_detection) which causes an error in line 29 of init in burst_detection. unsure if this is the error since I can't replicate on my console. except ValueError: r_str = str(r) d_str = str(d) continue except Exception as e: print('Error: ' + repr(e)) continue bursts = bd.enumerate_bursts(q,'burstLabel') weighted_bursts = bd.burst_weights(bursts,r,d,p) if weighted_bursts.empty: continue kw_str = 'weighted bursts for topic no. ' + str(i) + ':' + '\n' bursts_string = kw_str + str(weighted_bursts) + '\n' beg_list = weighted_bursts['begin'] end_list = weighted_bursts['end'] for i in range(len(beg_list)): start_index = beg_list[i] end_index = end_list[i] start_date = datetime.date.fromordinal(int(co_list[start_index][0])) end_date = datetime.date.fromordinal(int(co_list[end_index][0])) date_str = '{} Start: {} End: {}\n\n'.format(i,start_date,end_date) bursts_string = bursts_string + date_str co_bursts_str = co_bursts_str + bursts_string if co_bursts_str != "": co_bursts_str = company[0].upper() + '\n' + co_bursts_str full_save_string += co_bursts_str with open('bursts_by_topic.txt','w') as f: f.write(full_save_string) with open('bursts_errors.txt','w') as f: f.write(err_string)
def get_bursts(topics_list,date_abs): bursts_string = "" for topic in topics_list: print('Topic') for keyword in topic: print(keyword) r = [] d = [] for date_abs_tuple in date_abs: orddate = date_abs_tuple[0] abs_list = date_abs_tuple[1] target_events = 0 d.append(len(abs_list)) for abstract in abs_list: if keyword in abstract: target_events += 1 r.append(target_events) n = len(r) if all(elem == 0 for elem in r): continue print('calculating the bursts') try: q,d,r,p = bd.burst_detection(r,d,n,s=2.2,gamma=1.0,smooth_win=1) except Exception as e: print('Error: ' + repr(e)) continue bursts = bd.enumerate_bursts(q,'burstLabel') weighted_bursts = bd.burst_weights(bursts,r,d,p) if weighted_bursts.empty: continue kw_str = 'weighted bursts for ' + keyword + ':' + '\n' bursts_string = kw_str + str(weighted_bursts) + '\n' beg_list = weighted_bursts['begin'] end_list = weighted_bursts['end'] for i in range(len(beg_list)): start_index = beg_list[i] end_index = end_list[i] start_date = datetime.date.fromordinal(int(date_abs[start_index][0])) end_date = datetime.date.fromordinal(int(date_abs[end_index][0])) date_str = '{} Start: {} End: {}\n\n'.format(i,start_date,end_date) bursts_string = bursts_string + date_str with open('bursts_no_company.txt','w') as f: f.write(bursts_string)
def get_total_events(tx, token_address): global time_list global first_timestamp events = [] i = 0 timestamp = 0 day_event = 0 first_timestamp = 0 for row in tx.run( "match (n:NODE)-[t:TOKEN_TRANSFER]->(m:NODE{address:$token_address}) " "where t.time <= 1546214400 " "return t.time as time order by t.time", token_address=token_address): if i == 0: index = bisect.bisect_left(time_list, row["time"]) timestamp = time_list[index] first_timestamp = timestamp i += 1 if row["time"] <= timestamp: day_event += 1 else: events.append(day_event) day_event = 0 timestamp += 86400 while (row["time"] > timestamp): events.append(0) timestamp += 86400 day_event += 1 events.append(day_event) maximum_day_event = max(events) + 10 total_events = [] for i in range(len(events)): total_events.append(maximum_day_event) file1 = open("burst_probability.csv", "a+") r = np.array(events, dtype=float) d = np.array(total_events, dtype=float) n = len(r) q, d, r, p = bd.burst_detection(r, d, n, s=1.75, gamma=1, smooth_win=3) file1.write("{0} {1} {2}\n".format(token_address, p[0], p[1])) bursts = bd.enumerate_bursts(q, 'burstLabel') return bursts
def detect_bursts(self, s=2, gamma=0.5): ''' detect intervals with bursts of activity: [begin_timestamp, end_timestamp) :param s: multiplicative distance between states (input to burst_detection library) :param gamma: difficulty associated with moving up a state (input to burst_detection library) burst_detection library: https://pypi.org/project/burst_detection/ ''' r = self.counts_df[self.id_col].values n = len(r) d = np.array([sum(r)] * n, dtype=float) q = bd.burst_detection(r, d, n, s, gamma, 1)[0] bursts_df = bd.enumerate_bursts(q, 'burstLabel') index_date = pd.Series(self.counts_df[self.timestamp_col].values, index=self.counts_df.index).to_dict() bursts_df['begin_timestamp'] = bursts_df['begin'].map(index_date) bursts_df['end_timestamp'] = bursts_df['end'].map(index_date) time_granularity = index_date[1] - index_date[0] self.burst_intervals = [(burst['begin_timestamp'], burst['end_timestamp'] + time_granularity) for _, burst in bursts_df.iterrows()] self.update_with_burst()