示例#1
0
def train_model(user_name, table_name, start, end, pos, neg, step):
	# Train the model for a user using the data in %table_name% between %start% and %end%
	# Then store the model and return the model
	# Format of start and end like '20180309'
	clf_name = get_clf_name(user_name, pos, neg, start)
	cnxs = localization.read_local_data(start, end, table_name)
	intervals, ap_list, cnx_density, ap_freq = localization.create_intervals(cnxs)
	data = localization.create_training_data(intervals, ap_list, pos, neg, cnx_density)
	clf = localization.semi_supervised_learning(data, step)
	model = Model(clf, ap_list, cnx_density, ap_freq)
	pickle.dump(model, open(clf_name, 'wb'))
	return model
示例#2
0
def coarse_baseline(query_time, user, table):
    # Return 0 means outside, 1 means inside
    # The second string means the access point if 1 is returned.

    # Assume the location data for a user exists and load all events in the queried day
    date_str = str(query_time.date()).replace('-', '')
    all_events = localization.read_local_data(date_str, date_str, table, True,
                                              user)
    if len(all_events) == 0 or len(all_events) == 1:
        print(
            'Query for %s at %s: Not enough connection activities on that day.'
            % (user, query_time))
        return 0, ''

    n = len(all_events)
    if query_time < all_events[0][0] or query_time >= all_events[n - 1][0]:
        print(
            f'B: Query time {query_time} for {user} is outside the first event {all_events[0]} and the last event {all_events[-1]}'
        )
        return 0, ''

    # compute p as the index of predecessor event to query_time
    left = 0
    right = n - 1
    while left < right:
        mid = (left + right + 1) // 2
        if query_time < all_events[mid][0]:
            right = mid - 1
        else:
            left = mid
    p = left

    # online or not decision based on the duration between two events
    if p == n - 1:
        p = n - 2

    duration = (all_events[p + 1][0] - all_events[p][0]).total_seconds()
    if duration < 3600:
        return 1, all_events[p][1]
    else:
        return 0, ''
示例#3
0
def train_bl_model(user_name,
                   table_name,
                   start,
                   end,
                   pos,
                   neg,
                   step,
                   validity,
                   simu=False):
    # Train the model for building level localization
    clf_name = get_bl_clf_name(user_name, pos, neg, start, end, validity)
    all_events = localization.read_local_data(start, end, table_name, simu,
                                              user_name)
    gaps, cnx_density, ap_cnx_count = localization.create_gaps(
        all_events, validity)
    data = localization.create_training_data(gaps, list(ap_cnx_count.keys()),
                                             pos, neg, cnx_density)
    clf = localization.semi_supervised_learning(data, step)
    model = Model(clf, list(ap_cnx_count.keys()), cnx_density, ap_cnx_count)
    pickle.dump(model, open(clf_name, 'wb'))
    return model
示例#4
0
def answer_query(query_time, user_name, table_name, start=start_date, end=end_date,
				 pos=pos_threshold, neg=neg_threshold, step=step_size):
	# Check whether the model exists for a user, if not generate one, if does, load one
	# query_time: datetime type python
	date_str = str(query_time.date()).replace('-', '')
	all_entries = localization.read_local_data(date_str, date_str, table_name)
	if len(all_entries) == 0 or len(all_entries) == 1:
		print('Query %s for %s: No connection activities on that day.' % (query_time, user_name))
		return 0, ''
	intervals, _, _, _ = localization.create_intervals(all_entries)
	interval = localization.find_interval(query_time, intervals)
	if interval is None:
		print('Query %s for %s: Query time before the first connection or after the last on that day.' % (
			query_time, user_name))
		return 0, ''
	try:
		f = open(get_clf_name(user_name, pos, neg, start), 'rb')
		model = pickle.load(f)
	except FileNotFoundError:
		model = train_model(user_name, table_name, start, end, pos, neg, step)
	state, ap = localization.predict_an_interval(interval, model.ap_list, model.clf, model.density, model.ap_freq)
	return state, ap
示例#5
0
def train_rl_model(user_name,
                   table_name,
                   start,
                   end,
                   pos,
                   neg,
                   step,
                   validity,
                   freq,
                   simu=False):
    # Train the model for region level localization
    clf_name = get_rl_clf_name(user_name, pos, neg, start, end, validity, freq)
    all_events = localization.read_local_data(start, end, table_name, simu,
                                              user_name)
    gaps, cnx_density, ap_cnx_count = localization.create_gaps(
        all_events, validity)
    data = localization.create_region_train_data(gaps,
                                                 list(ap_cnx_count.keys()),
                                                 pos, neg, cnx_density,
                                                 freq_th, ap_cnx_count)
    clf = localization.semi_supervised_learning_new(data, step)
    pickle.dump(clf, open(clf_name, 'wb'))
    return clf
示例#6
0
def answer_query_new(query_time,
                     user_name,
                     table_name,
                     start=start_date,
                     end=end_date,
                     pos=pos_threshold,
                     neg=neg_threshold,
                     validity=validity_second,
                     pos_r=pos_r_th,
                     neg_r=neg_r_th,
                     freq=freq_th,
                     fast=False,
                     step=step_size,
                     simu=False):
    # Assume the location data for a user exists and load all events in the queried day
    date_str = str(query_time.date()).replace('-', '')
    read_start = process_time()
    all_entries = localization.read_local_data(date_str, date_str, table_name,
                                               simu, user_name)
    read_end = process_time()
    print('data read time: %.2f ms' % (1000 * (read_end - read_start), ))
    run_start = process_time()
    if len(all_entries) == 0 or len(all_entries) == 1:
        if len(all_entries) == 1 and all_entries[0][0] == query_time:
            return 1, all_entries[0][1]
        print(
            'Query for %s at %s: Not enough connection activities on that day.'
            % (user_name, query_time))
        return 0, ''

    # First check whether online or not, if online, work is done
    state, ap = localization.online_or_not(query_time, all_entries, validity,
                                           user_name)
    if state == 0 or state == 1:
        print('Execution time: %.2f ms' %
              ((process_time() - run_start) * 1000, ))
        return state, ap

    # Further analysis is needed. The semi-supervised learning method comes. First building level model.
    try:
        f = open(get_bl_clf_name(user_name, pos, neg, start, end, validity),
                 'rb')
        bl_model = pickle.load(f)
    except FileNotFoundError:
        bl_model = train_bl_model(user_name, table_name, start, end, pos, neg,
                                  step, validity)

    # Then region level model
    if fast:
        rl_clf = 0
    else:
        try:
            f = open(
                get_rl_clf_name(user_name, pos_r, neg_r, start, end, validity,
                                freq), 'rb')
            rl_clf = pickle.load(f)
        except FileNotFoundError:
            rl_clf = train_rl_model(user_name, table_name, start, end, pos_r,
                                    neg_r, step, validity, freq)

    state, ap = localization.predict_a_gap(state, bl_model, rl_clf, fast)
    print('Execution time: %.2f ms' % ((process_time() - run_start) * 1000, ))
    return state, ap