def train_model(user_name, table_name, start, end, pos, neg, step): # Train the model for a user using the data in %table_name% between %start% and %end% # Then store the model and return the model # Format of start and end like '20180309' clf_name = get_clf_name(user_name, pos, neg, start) cnxs = localization.read_local_data(start, end, table_name) intervals, ap_list, cnx_density, ap_freq = localization.create_intervals(cnxs) data = localization.create_training_data(intervals, ap_list, pos, neg, cnx_density) clf = localization.semi_supervised_learning(data, step) model = Model(clf, ap_list, cnx_density, ap_freq) pickle.dump(model, open(clf_name, 'wb')) return model
def coarse_baseline(query_time, user, table): # Return 0 means outside, 1 means inside # The second string means the access point if 1 is returned. # Assume the location data for a user exists and load all events in the queried day date_str = str(query_time.date()).replace('-', '') all_events = localization.read_local_data(date_str, date_str, table, True, user) if len(all_events) == 0 or len(all_events) == 1: print( 'Query for %s at %s: Not enough connection activities on that day.' % (user, query_time)) return 0, '' n = len(all_events) if query_time < all_events[0][0] or query_time >= all_events[n - 1][0]: print( f'B: Query time {query_time} for {user} is outside the first event {all_events[0]} and the last event {all_events[-1]}' ) return 0, '' # compute p as the index of predecessor event to query_time left = 0 right = n - 1 while left < right: mid = (left + right + 1) // 2 if query_time < all_events[mid][0]: right = mid - 1 else: left = mid p = left # online or not decision based on the duration between two events if p == n - 1: p = n - 2 duration = (all_events[p + 1][0] - all_events[p][0]).total_seconds() if duration < 3600: return 1, all_events[p][1] else: return 0, ''
def train_bl_model(user_name, table_name, start, end, pos, neg, step, validity, simu=False): # Train the model for building level localization clf_name = get_bl_clf_name(user_name, pos, neg, start, end, validity) all_events = localization.read_local_data(start, end, table_name, simu, user_name) gaps, cnx_density, ap_cnx_count = localization.create_gaps( all_events, validity) data = localization.create_training_data(gaps, list(ap_cnx_count.keys()), pos, neg, cnx_density) clf = localization.semi_supervised_learning(data, step) model = Model(clf, list(ap_cnx_count.keys()), cnx_density, ap_cnx_count) pickle.dump(model, open(clf_name, 'wb')) return model
def answer_query(query_time, user_name, table_name, start=start_date, end=end_date, pos=pos_threshold, neg=neg_threshold, step=step_size): # Check whether the model exists for a user, if not generate one, if does, load one # query_time: datetime type python date_str = str(query_time.date()).replace('-', '') all_entries = localization.read_local_data(date_str, date_str, table_name) if len(all_entries) == 0 or len(all_entries) == 1: print('Query %s for %s: No connection activities on that day.' % (query_time, user_name)) return 0, '' intervals, _, _, _ = localization.create_intervals(all_entries) interval = localization.find_interval(query_time, intervals) if interval is None: print('Query %s for %s: Query time before the first connection or after the last on that day.' % ( query_time, user_name)) return 0, '' try: f = open(get_clf_name(user_name, pos, neg, start), 'rb') model = pickle.load(f) except FileNotFoundError: model = train_model(user_name, table_name, start, end, pos, neg, step) state, ap = localization.predict_an_interval(interval, model.ap_list, model.clf, model.density, model.ap_freq) return state, ap
def train_rl_model(user_name, table_name, start, end, pos, neg, step, validity, freq, simu=False): # Train the model for region level localization clf_name = get_rl_clf_name(user_name, pos, neg, start, end, validity, freq) all_events = localization.read_local_data(start, end, table_name, simu, user_name) gaps, cnx_density, ap_cnx_count = localization.create_gaps( all_events, validity) data = localization.create_region_train_data(gaps, list(ap_cnx_count.keys()), pos, neg, cnx_density, freq_th, ap_cnx_count) clf = localization.semi_supervised_learning_new(data, step) pickle.dump(clf, open(clf_name, 'wb')) return clf
def answer_query_new(query_time, user_name, table_name, start=start_date, end=end_date, pos=pos_threshold, neg=neg_threshold, validity=validity_second, pos_r=pos_r_th, neg_r=neg_r_th, freq=freq_th, fast=False, step=step_size, simu=False): # Assume the location data for a user exists and load all events in the queried day date_str = str(query_time.date()).replace('-', '') read_start = process_time() all_entries = localization.read_local_data(date_str, date_str, table_name, simu, user_name) read_end = process_time() print('data read time: %.2f ms' % (1000 * (read_end - read_start), )) run_start = process_time() if len(all_entries) == 0 or len(all_entries) == 1: if len(all_entries) == 1 and all_entries[0][0] == query_time: return 1, all_entries[0][1] print( 'Query for %s at %s: Not enough connection activities on that day.' % (user_name, query_time)) return 0, '' # First check whether online or not, if online, work is done state, ap = localization.online_or_not(query_time, all_entries, validity, user_name) if state == 0 or state == 1: print('Execution time: %.2f ms' % ((process_time() - run_start) * 1000, )) return state, ap # Further analysis is needed. The semi-supervised learning method comes. First building level model. try: f = open(get_bl_clf_name(user_name, pos, neg, start, end, validity), 'rb') bl_model = pickle.load(f) except FileNotFoundError: bl_model = train_bl_model(user_name, table_name, start, end, pos, neg, step, validity) # Then region level model if fast: rl_clf = 0 else: try: f = open( get_rl_clf_name(user_name, pos_r, neg_r, start, end, validity, freq), 'rb') rl_clf = pickle.load(f) except FileNotFoundError: rl_clf = train_rl_model(user_name, table_name, start, end, pos_r, neg_r, step, validity, freq) state, ap = localization.predict_a_gap(state, bl_model, rl_clf, fast) print('Execution time: %.2f ms' % ((process_time() - run_start) * 1000, )) return state, ap