def saveEvents(timeSegments, mahal_timeseries, zscore_timeseries, global_pace_timeseries, out_file, sorted_mahal=None, mahal_threshold=None): eventList = [] #Compute expected pace and variance (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Iterate through the TimeSegments for segment in timeSegments: #If the segment is above the threshold, it is an event if(segment.state==True): start_key = timeSegments.sorted_dates[segment.start_id] end_key = timeSegments.sorted_dates[segment.end_id] #Compute event properties event = computeEventProperties(start_key, end_key, mahal_timeseries, global_pace_timeseries, expected_pace_timeseries, zscore_timeseries, sorted_mahal=sorted_mahal, mahal_threshold=mahal_threshold) #Add to list eventList.append(event) #Sort events by duration, in descending order eventList.sort(key = lambda x: x[5], reverse=True) #Write events to a CSV file w = csv.writer(open(out_file, "w")) w.writerow(["start_date", "end_date", "max_mahal", "mahal_quant", "duration", "hours_above_thresh", "max_pace_dev", "min_pace_dev", "worst_trip"]) for event in eventList: [start_date, end_date, max_mahal, mahal_quant, duration, hours_above_thresh, max_pace_dev, min_pace_dev, worst_trip] = event formattedEvent = [start_date, end_date, "%.2f" % max_mahal, "%.3f" % mahal_quant, duration, hours_above_thresh, "%.2f" % max_pace_dev, "%.2f" % min_pace_dev, worst_trip] w.writerow(formattedEvent)
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if(mahal_list[i] > threshold or c_list[i]==1): symbols.append(1) else: symbols.append(0) # Set up the hidden markov model. We are modeling the non-event states as "0" # and event states as "1" # Transition matrix with heavy weight on the diagonals ensures that the model # is likely to stick in the same state rather than rapidly switching. In other # words, the predictions will be relatively "smooth" trans_matrix = array([[.999, .001], [.001,.999]]) # Emission matrix - state 0 is likely to emit symbol 0, and vice versa # In other words, events are likely to be outliers emission_matrix = array([[.95, .05], [.4, .6]]) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key = lambda x: x[2], reverse=True) return events, predictions
def saveEvents(timeSegments, mahal_timeseries, zscore_timeseries, global_pace_timeseries, out_file, sorted_mahal=None, mahal_threshold=None): eventList = [] #Compute expected pace and variance (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Iterate through the TimeSegments for segment in timeSegments: #If the segment is above the threshold, it is an event if (segment.state == True): start_key = timeSegments.sorted_dates[segment.start_id] end_key = timeSegments.sorted_dates[segment.end_id] #Compute event properties event = computeEventProperties(start_key, end_key, mahal_timeseries, global_pace_timeseries, expected_pace_timeseries, zscore_timeseries, sorted_mahal=sorted_mahal, mahal_threshold=mahal_threshold) #Add to list eventList.append(event) #Sort events by duration, in descending order eventList.sort(key=lambda x: x[5], reverse=True) #Write events to a CSV file w = csv.writer(open(out_file, "w")) w.writerow([ "start_date", "end_date", "max_mahal", "mahal_quant", "duration", "hours_above_thresh", "max_pace_dev", "min_pace_dev", "worst_trip" ]) for event in eventList: [ start_date, end_date, max_mahal, mahal_quant, duration, hours_above_thresh, max_pace_dev, min_pace_dev, worst_trip ] = event formattedEvent = [ start_date, end_date, "%.2f" % max_mahal, "%.3f" % mahal_quant, duration, hours_above_thresh, "%.2f" % max_pace_dev, "%.2f" % min_pace_dev, worst_trip ] w.writerow(formattedEvent)
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95, trans_matrix = DEFAULT_TRANS_MATRIX, emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if(mahal_list[i] > threshold or c_list[i]==1): symbols.append(1) else: symbols.append(0) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key = lambda x: x[2], reverse=True) return events, predictions
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95, trans_matrix=DEFAULT_TRANS_MATRIX, emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None): #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] c_list = [c_timeseries[d] for d in sorted_dates] global_pace_list = [global_pace_timeseries[d] for d in sorted_dates] expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # The symbols array contains "1" if there is an outlier, "0" if there is not symbols = [] for i in range(len(mahal_list)): if (mahal_list[i] > threshold or c_list[i] == 1): symbols.append(1) else: symbols.append(0) # Actually set up the hmm model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state) model.emissionprob_ = emission_matrix # Make the predictions lnl, predictions = model.decode(symbols) events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list, expected_pace_list) # Sort events by duration, starting with the long events events.sort(key=lambda x: x[2], reverse=True) return events, predictions
def detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, out_file, window_size=6, threshold_quant=.95): logMsg("Detecting events at %d%% bound" % int(threshold_quant*100)) #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # Get the expected global pace (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) start_date = datetime(2010,1,1) end_date = datetime(2014,1,1) shift = timedelta(hours=window_size) prev_above_threshold = False current_event_start = None current_event_end = None eventList = [] for date in dateRange(start_date, end_date, shift): #print #print(str(date)) #print(prev_above_threshold) if(crossesThreshold(date, date+shift, mahal_timeseries, threshold)): #print("CROSS") if(not prev_above_threshold): #print("RESET") current_event_start = date current_event_end = date+shift prev_above_threshold=True else: if(prev_above_threshold): #print("*************OUTPUTTING************") #print("%s -> %s" % (current_event_start, current_event_end)) start_key = keyFromDatetime(current_event_start) end_key = keyFromDatetime(current_event_end) event = computeEventProperties(start_key, end_key, mahal_timeseries, global_pace_timeseries, expected_pace_timeseries, zscore_timeseries, sorted_mahal=sorted_mahal, mahal_threshold=threshold) #Add to list eventList.append(event) prev_above_threshold =False #Sort events by duration, in descending order eventList.sort(key = lambda x: x[5], reverse=True) #Write events to a CSV file w = csv.writer(open(out_file, "w")) w.writerow(["start_date", "end_date", "max_mahal", "mahal_quant", "duration", "hours_above_thresh", "max_pace_dev", "min_pace_dev", "worst_trip"]) for event in eventList: [start_date, end_date, max_mahal, mahal_quant, duration, hours_above_thresh, max_pace_dev, min_pace_dev, worst_trip] = event formattedEvent = [start_date, end_date, "%.2f" % max_mahal, "%.3f" % mahal_quant, duration, hours_above_thresh, "%.2f" % max_pace_dev, "%.2f" % min_pace_dev, worst_trip] w.writerow(formattedEvent) return eventList
def detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, out_file, window_size=6, threshold_quant=.95): logMsg("Detecting events at %d%% bound" % int(threshold_quant * 100)) #Sort the keys of the timeseries chronologically sorted_dates = sorted(mahal_timeseries) #Generate the list of values of R(t) mahal_list = [mahal_timeseries[d] for d in sorted_dates] #Use the quantile to determine the threshold sorted_mahal = sorted(mahal_list) threshold = getQuantile(sorted_mahal, threshold_quant) # Get the expected global pace (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries) start_date = datetime(2010, 1, 1) end_date = datetime(2014, 1, 1) shift = timedelta(hours=window_size) prev_above_threshold = False current_event_start = None current_event_end = None eventList = [] for date in dateRange(start_date, end_date, shift): #print #print(str(date)) #print(prev_above_threshold) if (crossesThreshold(date, date + shift, mahal_timeseries, threshold)): #print("CROSS") if (not prev_above_threshold): #print("RESET") current_event_start = date current_event_end = date + shift prev_above_threshold = True else: if (prev_above_threshold): #print("*************OUTPUTTING************") #print("%s -> %s" % (current_event_start, current_event_end)) start_key = keyFromDatetime(current_event_start) end_key = keyFromDatetime(current_event_end) event = computeEventProperties(start_key, end_key, mahal_timeseries, global_pace_timeseries, expected_pace_timeseries, zscore_timeseries, sorted_mahal=sorted_mahal, mahal_threshold=threshold) #Add to list eventList.append(event) prev_above_threshold = False #Sort events by duration, in descending order eventList.sort(key=lambda x: x[5], reverse=True) #Write events to a CSV file w = csv.writer(open(out_file, "w")) w.writerow([ "start_date", "end_date", "max_mahal", "mahal_quant", "duration", "hours_above_thresh", "max_pace_dev", "min_pace_dev", "worst_trip" ]) for event in eventList: [ start_date, end_date, max_mahal, mahal_quant, duration, hours_above_thresh, max_pace_dev, min_pace_dev, worst_trip ] = event formattedEvent = [ start_date, end_date, "%.2f" % max_mahal, "%.3f" % mahal_quant, duration, hours_above_thresh, "%.2f" % max_pace_dev, "%.2f" % min_pace_dev, worst_trip ] w.writerow(formattedEvent) return eventList