def performEventDurationTest(): mahal_timeseries = readOutlierScores("results/outlier_scores.csv") global_pace_timeseries = readGlobalPace("4year_features") zscore_timeseries = readZScoresTimeseries("results/zscore.csv") mahal_timeseries_fine = readOutlierScores("results/link_20_normalize_outlier_scores.csv") threshold_vals = [.90,.91,.92,.93,.94,.95,.96,.97,.98,.99] window_sizes = [1,2,3,4,6,8,12,24] with open('results/threshold_experiment.csv', 'w') as f: w = csv.writer(f) w.writerow(['granularity', 'window','threshold', 'duration']) for window_size in window_sizes: for threshold in threshold_vals: print (window_size, threshold) events = detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, "results/events_windowed.csv", window_size=window_size, threshold_quant=threshold) duration = getEventDuration(events, "2012-10-31") w.writerow(["coarse", window_size, threshold, duration]) events= detectWindowedEvents(mahal_timeseries_fine, zscore_timeseries, global_pace_timeseries, "results/link_20_normalize_events_windowed.csv", window_size=window_size, threshold_quant=threshold) duration = getEventDuration(events, "2012-10-31") w.writerow(["fine", window_size, threshold, duration])
def performEventDurationTest(): mahal_timeseries = readOutlierScores("results/outlier_scores.csv") global_pace_timeseries = readGlobalPace("4year_features") zscore_timeseries = readZScoresTimeseries("results/zscore.csv") mahal_timeseries_fine = readOutlierScores( "results/link_20_normalize_outlier_scores.csv") threshold_vals = [.90, .91, .92, .93, .94, .95, .96, .97, .98, .99] window_sizes = [1, 2, 3, 4, 6, 8, 12, 24] with open('results/threshold_experiment.csv', 'w') as f: w = csv.writer(f) w.writerow(['granularity', 'window', 'threshold', 'duration']) for window_size in window_sizes: for threshold in threshold_vals: print(window_size, threshold) events = detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, "results/events_windowed.csv", window_size=window_size, threshold_quant=threshold) duration = getEventDuration(events, "2012-10-31") w.writerow(["coarse", window_size, threshold, duration]) events = detectWindowedEvents( mahal_timeseries_fine, zscore_timeseries, global_pace_timeseries, "results/link_20_normalize_events_windowed.csv", window_size=window_size, threshold_quant=threshold) duration = getEventDuration(events, "2012-10-31") w.writerow(["fine", window_size, threshold, duration])
def process_events(outlier_score_file, feature_dir, output_file): mahal_timeseries, c_timeseries = readOutlierScores(outlier_score_file) global_pace_timeseries = readGlobalPace(feature_dir) events, predictions = detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries) new_scores_file = output_file.split(".")[0] + "_scores.csv" augment_outlier_scores(outlier_score_file, new_scores_file, predictions) with open(output_file, 'w') as f: w = csv.writer(f) w.writerow(['event', 'start_date', 'end_date', 'duration', 'max_mahal', 'max_pace_dev', 'min_pace_dev']) for line in events: w.writerow(['?'] + line)
def run_sims_in_parallel(outlier_score_file, feature_dir, output_file): pool = Pool(8) mahal_timeseries, c_timeseries = readOutlierScores(outlier_score_file) global_pace_timeseries = readGlobalPace(feature_dir) sim_function = partial(run_many_simulations, mahal_timeseries = mahal_timeseries, c_timeseries = c_timeseries, global_pace_timeseries=global_pace_timeseries) sim_sizes = [1250]*8 result = pool.map(sim_function, sim_sizes) with open(output_file, 'w') as f: w = csv.writer(f) w.writerow(['event','start_date', 'end_date', 'duration', 'max_pace_dev', 'min_pace_dev']) for chunk in result: w.writerows(chunk)
def run_random_sims(outlier_score_file, feature_dir): mahal_timeseries, c_timeseries = readOutlierScores(outlier_score_file) global_pace_timeseries = readGlobalPace(feature_dir) for p in range(50): print ("Sim %d" % p) initial_state, trans_matrix, emission_matrix = randomly_draw_parameters() events, predictions = detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95, trans_matrix = trans_matrix, emission_matrix=emission_matrix) new_scores_file = 'tmp_results/coarse_events_k%d_scores.csv'%p augment_outlier_scores(outlier_score_file, new_scores_file, predictions)
def process_events(outlier_score_file, feature_dir, output_file): mahal_timeseries, c_timeseries = readOutlierScores(outlier_score_file) global_pace_timeseries = readGlobalPace(feature_dir) events, predictions = detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries) new_scores_file = output_file.split(".")[0] + "_scores.csv" augment_outlier_scores(outlier_score_file, new_scores_file, predictions) with open(output_file, 'w') as f: w = csv.writer(f) w.writerow([ 'event', 'start_date', 'end_date', 'duration', 'max_mahal', 'max_pace_dev', 'min_pace_dev' ]) for line in events: w.writerow(['?'] + line)
events= detectWindowedEvents(mahal_timeseries_fine, zscore_timeseries, global_pace_timeseries, "results/link_20_normalize_events_windowed.csv", window_size=window_size, threshold_quant=threshold) duration = getEventDuration(events, "2012-10-31") w.writerow(["fine", window_size, threshold, duration]) if(__name__=="__main__"): #performEventDurationTest() mahal_timeseries = readOutlierScores("results/outlier_scores.csv") global_pace_timeseries = readGlobalPace("4year_features") zscore_timeseries = readZScoresTimeseries("results/zscore.csv") detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, "results/events_windowed.csv", window_size=8, threshold_quant=.95) mahal_timeseries = readOutlierScores("results/link_20_normalize_outlier_scores.csv") global_pace_timeseries = readGlobalPace("4year_features") zscore_timeseries = readZScoresTimeseries("results/zscore.csv") detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, "results/link_20_normalize_events_windowed.csv", window_size=8, threshold_quant=.95) logMsg("done")
events = detectWindowedEvents( mahal_timeseries_fine, zscore_timeseries, global_pace_timeseries, "results/link_20_normalize_events_windowed.csv", window_size=window_size, threshold_quant=threshold) duration = getEventDuration(events, "2012-10-31") w.writerow(["fine", window_size, threshold, duration]) if (__name__ == "__main__"): #performEventDurationTest() mahal_timeseries = readOutlierScores("results/outlier_scores.csv") global_pace_timeseries = readGlobalPace("4year_features") zscore_timeseries = readZScoresTimeseries("results/zscore.csv") detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, "results/events_windowed.csv", window_size=8, threshold_quant=.95) mahal_timeseries = readOutlierScores( "results/link_20_normalize_outlier_scores.csv") global_pace_timeseries = readGlobalPace("4year_features") zscore_timeseries = readZScoresTimeseries("results/zscore.csv") detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries,
r.next() timeseries = {} for line in r: (date, hour, weekday) = line[0:3] hour = int(hour) timeseries[(date, hour, weekday)] = map(float, line[3:]) return timeseries ######################################################################################################### ################################### MAIN CODE BEGINS HERE ############################################### ######################################################################################################### if (__name__ == "__main__"): #Read the previous results from file mahal_timeseries = readOutlierScores(OUTLIER_SCORE_FILE) global_pace_timeseries = readGlobalPace(FEATURE_DIR) zscore_timeseries = readZScoresTimeseries(ZSCORE_FILE) #Perform the event detection on the OUTLIER_SCORE, using extra info to describe the events #Events are detected as the 5% lowest values of R(t), and events less than 6 hours apart are merged detectEventsSwitching(mahal_timeseries, zscore_timeseries, global_pace_timeseries, OUT_UNFILTERED_EVENTS, OUT_FILTERED_EVENTS, min_event_spacing=6, threshold_quant=.90) logMsg("Done.")
#Returns: #A dictionary which contains the standardized pace vectors (as Numpy matrices), keyed by (date, hour, weekday) def readZScoresTimeseries(filename): r = csv.reader(open(filename, "r")) r.next() timeseries = {} for line in r: (date, hour, weekday) = line[0:3] hour = int(hour) timeseries[(date,hour,weekday)] = map(float, line[3:]) return timeseries ######################################################################################################### ################################### MAIN CODE BEGINS HERE ############################################### ######################################################################################################### if(__name__=="__main__"): #Read the previous results from file mahal_timeseries = readOutlierScores(OUTLIER_SCORE_FILE) global_pace_timeseries = readGlobalPace(FEATURE_DIR) zscore_timeseries = readZScoresTimeseries(ZSCORE_FILE) #Perform the event detection on the OUTLIER_SCORE, using extra info to describe the events #Events are detected as the 5% lowest values of R(t), and events less than 6 hours apart are merged detectEventsSwitching(mahal_timeseries, zscore_timeseries, global_pace_timeseries, OUT_UNFILTERED_EVENTS, OUT_FILTERED_EVENTS, min_event_spacing=6, threshold_quant=.90) logMsg("Done.")